Révision 2094

tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherRemplacer.txt (revision 2094)
1
Macro ChercherRemplacer
2
Auteur : Matthieu QUIGNARD
3
Version : 05 Février 2019
4

  
5
Retouche la valeur d'une propriété pour la remplacer par une autre.
6
Par exemple : CATEGORIE=PRO.CHECK => CATEGORIE=ERREUR
7

  
8
Possibilité d'inclure aussi les mentions dont la valeur initiale est vide.
9
Par exemple : CATEGORIE=          => CATEGORIE=ERREUR
10

  
11
NB : on peut utiliser cette macro pour retoucher le nom des référents.
12
Exemple : REF=roi de france       => REF=Le Roi de France
13

  
0 14

  
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherSupprimerMacro.groovy (revision 2094)
1
// ChercherSupprimer
2
// Auteur Matthieu Quignard
3
// Date : 14 janvier 2019
4

  
5
/**********
6
Sert à supprimer des mentions qui une valeur particulière attribuée
7
Par exemple : CATEGORIE=ERREUR
8
ou bien : REF=NON_REF
9
ou encore : CHECK=
10

  
11
ATTENTION : CETTE MACRO N'EST PAS REVERSIBLE
12
***********/
13

  
14
package org.txm.macroprototypes.urs.misc
15

  
16
import org.apache.commons.lang.*
17
import org.kohsuke.args4j.*
18
import groovy.transform.*
19
import org.txm.*
20
import org.txm.rcpapplication.swt.widget.parameters.*
21
import org.txm.analec.*
22
import org.txm.searchengine.cqp.*
23
import org.txm.searchengine.cqp.corpus.*
24
import visuAnalec.Message.*
25
import visuAnalec.donnees.*
26
import visuAnalec.elements.*
27
import visuAnalec.vue.*
28

  
29
// CORPS DU SCRIPT
30

  
31
if (!(corpusViewSelection instanceof MainCorpus)) {
32
	println "Corpora selection is not a Corpus"
33
	return
34
}
35

  
36
// BEGINNING OF PARAMETERS
37
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION")
38
def unit_type
39
@Field @Option(name="prop_name", usage="Propriété", widget="String", required=true, def="CATEGORIE")
40
def prop_name
41
@Field @Option(name="val_cherche", usage="Valeur recherchée", widget="String", required=true, def="")
42
def val_cherche
43
@Field @Option(name="inclureVides", usage="Inclure les valeurs vides", widget="Boolean", required=true, def="true")
44
def inclureVides
45

  
46
if (!ParametersDialog.open(this)) return
47

  
48
corpus = corpusViewSelection
49
analecCorpus = AnalecCorpora.getCorpus(corpus.getName())
50
vue = AnalecCorpora.getVue(corpus.getName())
51
structure = analecCorpus.getStructure()
52

  
53
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
54
	println "Erreur : le corpus ne contient d'unité de type $unit_type"
55
	println "Script terminé"
56
	return
57
}
58

  
59
if (!structure.getUniteProperties(unit_type).contains(prop_name)) { 
60
	println "Erreur : les unités $unit_type n'ont pas de propriété $prop_name"
61
	println "Script terminé"
62
	return
63
}
64

  
65
println "Option 'inclure les valeurs vides' : $inclureVides"
66

  
67
def nDeleted = 0
68
def nIgnored = 0
69

  
70
def units = analecCorpus.getUnites(unit_type)
71
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
72

  
73
def garbageBin = []
74

  
75
for (Unite unit : units) { // process all units
76
    def val = unit.getProp( prop_name )
77
    
78
    if ( (val == val_cherche) || ( inclureVides && (val == "")) ) {
79
    	garbageBin.add( unit )
80
    	nDeleted++
81
    } else {
82
    	nIgnored++
83
    }
84
}
85

  
86
// Suppression effective des unités ciblées
87
garbageBin.each {
88
   analecCorpus.supUnite( it )
89
}
90

  
91
if (nDeleted > 0) corpus.setIsModified(true);
92

  
93
println "Result:"
94
println "- $nDeleted units of type $unit_type have been deleted."
95
println "- $nIgnored units of type $unit_type have not been modified."
96

  
97
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherRemplacerMacro.groovy (revision 2094)
1
// ChercherRemplacer
2
// Auteur Matthieu Quignard
3
// Date : 14 janvier 2019
4

  
5
/**********
6
Sert à retoucher une valeur attribuée à une mention et la remplacer par une autre
7
Par exemple : CATEGORIE=PRO.CHECK => CATEGORIE=PRO.PER
8
ou bien : REF=roi de France => REF=Le Roi de France
9
ou encore : REF=      => REF=<EMPTY>
10
***********/
11

  
12
package org.txm.macroprototypes.urs.misc
13

  
14
import org.apache.commons.lang.*
15
import org.kohsuke.args4j.*
16
import groovy.transform.*
17
import org.txm.*
18
import org.txm.rcpapplication.swt.widget.parameters.*
19
import org.txm.analec.*
20
import org.txm.searchengine.cqp.*
21
import org.txm.searchengine.cqp.corpus.*
22
import visuAnalec.Message.*
23
import visuAnalec.donnees.*
24
import visuAnalec.elements.*
25
import visuAnalec.vue.*
26

  
27
// CORPS DU SCRIPT
28

  
29
if (!(corpusViewSelection instanceof MainCorpus)) {
30
	println "Corpora selection is not a Corpus"
31
	return
32
}
33

  
34
// BEGINNING OF PARAMETERS
35
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION")
36
def unit_type
37
@Field @Option(name="prop_name", usage="Propriété", widget="String", required=true, def="CATEGORIE")
38
def prop_name
39
@Field @Option(name="val_cherche", usage="Valeur recherchée", widget="String", required=true, def="")
40
def val_cherche
41
@Field @Option(name="val_remplace", usage="Valeur de remplacement", widget="String", required=true, def="")
42
def val_remplace
43
@Field @Option(name="inclureVides", usage="Inclure les valeurs vides", widget="Boolean", required=true, def="true")
44
def inclureVides
45

  
46
if (!ParametersDialog.open(this)) return
47

  
48
corpus = corpusViewSelection
49
analecCorpus = AnalecCorpora.getCorpus(corpus.getName())
50
vue = AnalecCorpora.getVue(corpus.getName())
51
structure = analecCorpus.getStructure()
52

  
53
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
54
	println "Erreur : le corpus ne contient d'unité de type $unit_type"
55
	println "Script terminé"
56
	return
57
}
58

  
59
if (!structure.getUniteProperties(unit_type).contains(prop_name)) { 
60
	println "Erreur : les unités $unit_type n'ont pas de propriété $prop_name"
61
	println "Script terminé"
62
	return
63
}
64

  
65
println "Option 'inclure les valeurs vides' : $inclureVides"
66

  
67
def nModified = 0
68
def nIgnored = 0
69

  
70
def units = analecCorpus.getUnites(unit_type)
71
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
72

  
73
for (Unite unit : units) { // process all units
74
    def val = unit.getProp( prop_name )
75
    
76
    if ( (val == val_cherche) || ( inclureVides && (val == "")) ) {
77
    	vue.setValeurChamp(unit, prop_name, val_remplace)
78
    	nModified++
79
    } else {
80
    	nIgnored++
81
    }
82
}
83

  
84

  
85

  
86
if (nModified > 0) corpus.setIsModified(true);
87

  
88
println "Result:"
89
println "- $nModified units of type $unit_type have been modified."
90
println "- $nIgnored units of type $unit_type have not been modified."
91

  
92
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/ChercherSupprimer.txt (revision 2094)
1
Macro ChercherSupprimer
2
Auteur : Matthieu QUIGNARD
3
Version : 05 Février 2019
4

  
5
Supprime de façon **DEFINITIVE** les mentions qui ont une certaine valeur de propriété.
6
Comme pour ChercherRemplacer, on peut choisir d'inclure aussi 
7
les mentions qui ont une valeur vide pour la propriété donnée.
8

  
9
Par exemple : 
10
    CATEGORIE=ERREUR
11
    CHECK=
12
    REF=NON_REF (suppression des mentions non référentielles)
13
    REF=SI (suppression de tous les singletons)
14
    
15
    
0 16

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2094)
43 43
		int maximum_schema_size
44 44
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
45 45
		String unit_ursql
46
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int limit_distance_in_schema
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
49
		limit_cql
46
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int position_in_schema
48
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
49
		cql_limit
50 50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
51 51
		boolean strict_inclusion
52
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int limit_distance
52
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int position
54 54
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
55 55
		debug
56 56
if (!ParametersDialog.open(this)) return
......
70 70
		props.addAll(analecCorpus.getStructure().getUniteProperties(type));
71 71

  
72 72
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, 
73
	unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
73
	unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
74 74

  
75 75
	allresults[corpus] = selectedUnits;
76 76
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialDensityMacro.groovy (revision 2094)
20 20

  
21 21
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
22 22
String unit_ursql
23
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
24
limit_cql
23
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
24
cql_limit
25 25
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
26 26
strict_inclusion
27
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
28
limit_distance
27
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
28
position
29 29

  
30 30
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
31 31
debug
......
40 40
int nMots = corpus.getSize();
41 41

  
42 42
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0,
43
	unit_ursql, 0, limit_cql, strict_inclusion, limit_distance);
43
	unit_ursql, 0, cql_limit, strict_inclusion, position);
44 44

  
45 45
int nUnites = units.size();
46 46

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsStabilityScoreMacro.groovy (revision 2094)
36 36
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
37 37
String word_property
38 38

  
39
@Field @Option(name="show_values", usage="", widget="Boolean", required=false, def="false")
40
boolean show_values
41

  
39 42
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
40 43
debug
41 44

  
......
60 63
def CQI = CQPSearchEngine.getCqiClient()
61 64

  
62 65
def prop = corpus.getProperty(word_property)
63

  
66
if (prop == null) { // no CQP property called $word_property
67
	errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property)
68
	if (errors.size() > 0) {
69
		println "** $word_property unit property cannot be computed in the corpus with types: $errors."
70
		return;
71
	}
72
}
64 73
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
65 74
allFormesSet = new HashSet();
66
nUnitesGrandTotal = 0;
67 75
def coefs = []
68 76
int n = 1
77

  
78
int nUnitesAllSchemas = 0
79
int nUnitesTotalSchemas = 0
80

  
69 81
for (def schema : schemas) {
70 82
	def formesSet = new HashSet(); // contient toutes les formes du CR courant
71 83
	nUnitesTotal = 0;
......
73 85
	def allUnites = schema.getUnitesSousjacentesNonTriees()
74 86

  
75 87
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
76
	def nUnites = units.size()
88
	def nUnites = schema.getUnitesSousjacentes().size()
89
	def nUnitesTotal = units.size()
77 90
	for (def unit : units) {
78 91
	
79 92
		String forme =  null;
......
88 101
		}
89 102
		
90 103
		formesSet.add(forme)
91
		
92
		nUnitesTotal++
93 104
	}
94
	if (formesSet.size() == 0) {
95
		coef = -1
105
	
106
	if (formesSet.size() == 0 || nUnitesTotal == 0) {
107
		coef = "NA"
96 108
	} else {
97 109
		coef = (nUnitesTotal/formesSet.size())
98 110
	}
99 111
	coefs << coef
100
	nUnitesGrandTotal += nUnitesTotal;
101 112
	allFormesSet.addAll(formesSet)
102 113
	
103
	if (schema_display_property_name != null) {
114
	if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
104 115
		print schema.getProp(schema_display_property_name)
105 116
	} else {
106 117
		print schema_ursql+"-"+n+" : "
107 118
	}
108 119
	
109
	println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef"
120
	println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
121
	if (show_values) {
122
		println "\t${word_property}s="+formesSet
123
	}
110 124
	n++
125
	
126
	nUnitesAllSchemas += nUnites
127
	nUnitesTotalSchemas += nUnitesTotal
111 128
}
112 129

  
113
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())]
130
coef = nUnitesTotalSchemas/allFormesSet.size()
131
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
132

  
133
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2094)
51 51
		String unit_property_display
52 52
@Field @Option(name="cqp_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="")
53 53
		String cqp_property_display
54
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
55
		int limit_distance_in_schema
56
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
57
		limit_cql
54
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
55
		int position_in_schema
56
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
57
		cql_limit
58 58
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
59 59
		strict_inclusion
60
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
61
		limit_distance
62
@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true")
63
		output_2D
60
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
61
		position
62
//@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true")
63
		output_2D = true
64 64
@Field @Option(name="output_showlegend", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true")
65 65
		output_showlegend
66 66
@Field @Option(name="output_fmin", usage="minimal frequency displayed", widget="Integer", required=true, def="0")
......
115 115
	}
116 116

  
117 117
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
118
			unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
118
			unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
119 119
	selectedUnits = new HashSet(selectedUnits)
120 120
	def counts = null
121 121
	if (cqp_property_display != null && cqp_property_display.length() > 0) {
......
163 163
else { corpus = corpusViewSelection }
164 164

  
165 165
def title = "${corpus.getMainCorpus()}.${corpusViewSelection}\n${unit_ursql}"
166
title += "[${limit_distance}]."
166
title += "[${position}]."
167 167
if (cqp_property_display.length() > 0) title += "${cqp_property_display} frequencies"
168 168
else if (unit_property_display.length() > 0) title += "${unit_property_display} frequencies"
169
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)"
169
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) title += "\n(${cql_limit} limits)"
170 170

  
171
if (cqp_property_display.length() > 0) println "Index de la propriété $cqp_property_display des mots des unités $unit_ursql[$limit_distance] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
172
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$limit_distance] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
171
if (cqp_property_display.length() > 0) println "Index de la propriété $cqp_property_display des mots des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
172
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
173 173
println "$unit_property_display\t"+selection.join("\t")
174 174

  
175 175
keys.eachWithIndex { prop_val, i ->
......
223 223
		   library(latticeExtra)
224 224
		   library(lattice)
225 225
		   library(RColorBrewer)
226
		   cloud(value~col+row, m, panel.3d.cloud=panel.3dbars, col.facet='grey', xbase=0.4, ybase=0.4, scales=list(arrows=FALSE, col=1), par.settings = list(axis.line = list(col = "transparent")))
226
		   cloud(value~col+row, ..., panel.3d.cloud=panel.3dbars, col.facet='grey', xbase=0.4, ybase=0.4, scales=list(arrows=FALSE, col=1), par.settings = list(axis.line = list(col = "transparent")))
227 227
		   """
228 228
	}
229 229

  
......
260 260
				@Override
261 261
				public void run() { try {
262 262
						if (UnitsIndexMacro.this.output_histogram) {
263
							OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), selection.toString()+" Units")
263
							//OpenSVGGraph.OpenSVGFile(file.getAbsolutePath(), selection.toString()+" Units")
264
							OpenBrowser.openfile(file.getAbsolutePath())
264 265
						}
265 266
						if (UnitsIndexMacro.this.output_lexicaltable) {
266 267
							CorporaView.refreshObject(corpus)
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2094)
46 46
		String schema_property_display
47 47
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="")
48 48
		String unit_ursql
49
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
50
		int limit_distance_in_schema
51
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
52
		limit_cql
49
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
50
		int position_in_schema
51
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
52
		cql_limit
53 53
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
54 54
		boolean strict_inclusion
55
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
56
		int limit_distance
55
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
56
		int position
57 57
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div")
58 58
		String struct_name
59 59
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n")
......
77 77

  
78 78
def CQI = CQPSearchEngine.getCqiClient()
79 79

  
80
def limit_cql_matches = null;
81
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) {
82
	def limitssubcorpus = parent.createSubcorpus(limit_cql, parent.getName().toUpperCase())
83
	limit_cql_matches = limitssubcorpus.getMatches();
80
def cql_limit_matches = null;
81
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
82
	def limitssubcorpus = parent.createSubcorpus(cql_limit, parent.getName().toUpperCase())
83
	cql_limit_matches = limitssubcorpus.getMatches();
84 84
	limitssubcorpus.delete();
85 85
} else {
86
	limit_cql_matches = parent.getMatches()
86
	cql_limit_matches = parent.getMatches()
87 87
}
88 88

  
89 89
def queries = []
......
121 121
	}
122 122

  
123 123
	allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas)
124
	if ((unit_ursql != null && unit_ursql.length() > 0) || (limit_cql != null && !limit_cql.getQueryString().equals("\"\""))) {
124
	if ((unit_ursql != null && unit_ursql.length() > 0) || (cql_limit != null && !cql_limit.getQueryString().equals("\"\""))) {
125 125
		if (debug) println "Building selection of units to highlight..."
126 126
		allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql)
127 127
	}
128 128

  
129
	if (limit_distance_in_schema > 0) allHighlightedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, allSchemas, limit_distance_in_schema)
129
	if (position_in_schema > 0) allHighlightedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, allSchemas, position_in_schema)
130 130

  
131 131
	if (allUnits.size() == 0) {
132 132
		println "No unit match for '$unit_ursql' selection. Aborting"
......
150 150
		def selectedAndHighlightedUnits = new HashSet() // faster to find items
151 151
		if (allHighlightedUnits != null && allHighlightedUnits.containsKey(schema)) selectedAndHighlightedUnits.addAll(allHighlightedUnits[schema])
152 152

  
153
		if (limit_cql_matches != null) {
153
		if (cql_limit_matches != null) {
154 154
			if (debug) println "corpus matches: "+parent.getMatches()
155
			if (debug) println "filter limit_cql_matches=${limit_cql_matches} with "+selectedAndHighlightedUnits.size()+" units."
156
			selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, limit_cql_matches, strict_inclusion, limit_distance)
155
			if (debug) println "filter cql_limit_matches=${cql_limit_matches} with "+selectedAndHighlightedUnits.size()+" units."
156
			selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, cql_limit_matches, strict_inclusion, position)
157 157
			if (debug) println "selectedAndHighlightedUnits=${selectedAndHighlightedUnits.size()}"
158 158
		}
159 159

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2094)
48 48
		int maximum_schema_size
49 49
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
50 50
		String unit_ursql
51
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
52
		int limit_distance_in_schema
53
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
54
		limit_cql
51
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
52
		int position_in_schema
53
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
54
		cql_limit
55 55
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
56 56
		boolean strict_inclusion
57
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
58
		int limit_distance
57
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
58
		int position
59 59
@Field @Option(name="output_mode", usage="If selected units properties and words are shown", widget="StringArray", metaVar="COUNT	TABULATED	FORMATED	CONCORDANCE	CQL", required=true, def="FORMATED")
60 60
		output_mode
61 61
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
......
76 76
	def analecCorpus = URSCorpora.getCorpus(mainCorpus)
77 77

  
78 78
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
79
			unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
79
			unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
80 80

  
81 81
	def n = 1
82 82

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ConcordanceToUnitMacro.groovy (revision 2094)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.edit
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.rcp.editors.concordances.*
8
import org.txm.searchengine.cqp.corpus.CQPCorpus
9
import org.txm.searchengine.cqp.corpus.MainCorpus
10
import org.txm.functions.concordances.*
11
import org.txm.annotation.urs.*
12
import org.txm.concordance.core.functions.Concordance
13
import org.txm.concordance.rcp.editors.ConcordanceEditor
14
import visuAnalec.elements.Unite
15

  
16
// BEGINNING OF PARAMETERS
17

  
18
@Field @Option(name="unit_type", usage="The unit type to create", widget="String", required=true, def="MENTION")
19
def unit_type
20

  
21
@Field @Option(name="create_only_if_new", usage="Create the unit if not already annotated", widget="Boolean", required=true, def="true")
22
def create_only_if_new
23

  
24
@Field @Option(name="prop", usage="prop", widget="String", required=true, def="REF")
25
def prop
26

  
27
@Field @Option(name="value", usage="default value", widget="String", required=true, def="NAME")
28
def value
29

  
30
// END OF PARAMETERS
31

  
32
// get a Concordance from 1) current Concordance editor or 2) CorporaView selection
33
Concordance concordance
34
if (editor instanceof ConcordanceEditor) {
35
	concordance = editor.getConcordance()
36
} else if (corpusViewSelection instanceof Concordance) {
37
	concordance = corpusViewSelection
38
} else {
39
	println "You must select a concordance or open a concordance result to run this macro."
40
	return false
41
}
42

  
43
if (concordance == null) {
44
	println "You must compute a concordance before."
45
	return
46
}
47

  
48
// check the analec corpus is ready
49
CQPCorpus corpus = concordance.getCorpus().getMainCorpus();
50
String name = corpus.getID()
51
if (!URSCorpora.isAnnotationStructureReady(corpus)) {
52
	println "Annotation structure is not ready."
53
	return
54
}
55

  
56
// Open the parameters input dialog box
57
if (!ParametersDialog.open(this)) return;
58

  
59
// check the corpus structure has the unit_type provided
60
def analecCorpus = URSCorpora.getCorpus(corpus)
61
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
62
	//println "The corpus structure does not contains unit with type=$unit_type"
63
	//return;
64
	analecCorpus.getStructure().ajouterType(Unite.class, unit_type);
65
}
66

  
67
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(prop)) {
68
	//println "The corpus structure does not contains unit with type=$unit_type"
69
	//return;
70
	analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, prop)
71
}
72

  
73
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, prop).contains(value)) {
74
	//println "The corpus structure does not contains unit with type=$unit_type"
75
	//return;
76
	analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, prop, value)
77
}
78

  
79
// browse lines and check
80
def units = analecCorpus.getUnites(unit_type)
81
def lines = concordance.getLines()
82

  
83
int n = 0
84
for (int iLine = 0 ; iLine < lines.size() ; iLine++) {
85
	int iUnit = 0
86
	def line = lines[iLine]
87
	def m = line.getMatch()
88
	def do_create = true
89
	if (create_only_if_new && iUnit < units.size()) { // test only if create_only_if_new == true
90
		def unit = null
91
		//TODO don't iterates over all units
92
		while (iUnit < units.size() ) { //&& units[iUnit].getDeb() < m.getStart()) {
93
			if (iUnit < units.size()) {
94
				unit = units[iUnit++]
95
				if (unit.getDeb() == m.getStart() && unit.getFin() == m.getEnd()) { // skip and print the line
96
					println("skiping concordance line '"+line.keywordToString()+"' at "+line.getViewRef().toString()+" ("+unit.getDeb()+ ", "+unit.getFin()+")")
97
					do_create = false
98
					continue
99
				}
100
			}
101
		}
102
	}
103
	if (do_create) {
104
		n++
105
		def props = [:]
106
		props[prop] = value
107
		Unite u = analecCorpus.addUniteSaisie(unit_type, m.getStart(), m.getEnd(), props)
108
		//	println "$props -> "+u.getProps()
109
	}
110
}
111
println "$n $unit_type created."
112
if (n > 0) corpus.setIsModified(true);
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/ConcordanceToUnitsMacro.groovy (revision 2094)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.edit
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.rcp.editors.concordances.*
8
import org.txm.searchengine.cqp.corpus.CQPCorpus
9
import org.txm.searchengine.cqp.corpus.MainCorpus
10
import org.txm.functions.concordances.*
11
import org.txm.annotation.urs.*
12
import org.txm.concordance.core.functions.Concordance
13
import org.txm.concordance.rcp.editors.ConcordanceEditor
14
import visuAnalec.elements.Unite
15

  
16
// BEGINNING OF PARAMETERS
17

  
18
@Field @Option(name="unit_type", usage="The unit type to create", widget="String", required=true, def="MENTION")
19
def unit_type
20

  
21
@Field @Option(name="create_only_if_new", usage="Create the unit if not already annotated", widget="Boolean", required=true, def="true")
22
def create_only_if_new
23

  
24
@Field @Option(name="prop", usage="prop", widget="String", required=true, def="REF")
25
def prop
26

  
27
@Field @Option(name="value", usage="default value", widget="String", required=true, def="NAME")
28
def value
29

  
30
// END OF PARAMETERS
31

  
32
// get a Concordance from 1) current Concordance editor or 2) CorporaView selection
33
Concordance concordance
34
if (editor instanceof ConcordanceEditor) {
35
	concordance = editor.getConcordance()
36
} else if (corpusViewSelection instanceof Concordance) {
37
	concordance = corpusViewSelection
38
} else {
39
	println "You must select a concordance or open a concordance result to run this macro."
40
	return false
41
}
42

  
43
if (concordance == null) {
44
	println "You must compute a concordance before."
45
	return
46
}
47

  
48
// check the analec corpus is ready
49
CQPCorpus corpus = concordance.getCorpus().getMainCorpus();
50
String name = corpus.getID()
51
if (!URSCorpora.isAnnotationStructureReady(corpus)) {
52
	println "Annotation structure is not ready."
53
	return
54
}
55

  
56
// Open the parameters input dialog box
57
if (!ParametersDialog.open(this)) return;
58

  
59
// check the corpus structure has the unit_type provided
60
def analecCorpus = URSCorpora.getCorpus(corpus)
61
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
62
	//println "The corpus structure does not contains unit with type=$unit_type"
63
	//return;
64
	analecCorpus.getStructure().ajouterType(Unite.class, unit_type);
65
}
66

  
67
if (!analecCorpus.getStructure().getNomsProps(Unite.class, unit_type).contains(prop)) {
68
	//println "The corpus structure does not contains unit with type=$unit_type"
69
	//return;
70
	analecCorpus.getStructure().ajouterProp(Unite.class, unit_type, prop)
71
}
72

  
73
if (!analecCorpus.getStructure().getValeursProp(Unite.class, unit_type, prop).contains(value)) {
74
	//println "The corpus structure does not contains unit with type=$unit_type"
75
	//return;
76
	analecCorpus.getStructure().ajouterVal(Unite.class, unit_type, prop, value)
77
}
78

  
79
// browse lines and check
80
def units = analecCorpus.getUnites(unit_type)
81
def lines = concordance.getLines()
82

  
83
int n = 0
84
for (int iLine = 0 ; iLine < lines.size() ; iLine++) {
85
	int iUnit = 0
86
	def line = lines[iLine]
87
	def m = line.getMatch()
88
	def do_create = true
89
	if (create_only_if_new && iUnit < units.size()) { // test only if create_only_if_new == true
90
		def unit = null
91
		//TODO don't iterates over all units
92
		while (iUnit < units.size() ) { //&& units[iUnit].getDeb() < m.getStart()) {
93
			if (iUnit < units.size()) {
94
				unit = units[iUnit++]
95
				if (unit.getDeb() == m.getStart() && unit.getFin() == m.getEnd()) { // skip and print the line
96
					println("skiping concordance line '"+line.keywordToString()+"' at "+line.getViewRef().toString()+" ("+unit.getDeb()+ ", "+unit.getFin()+")")
97
					do_create = false
98
					continue
99
				}
100
			}
101
		}
102
	}
103
	if (do_create) {
104
		n++
105
		def props = [:]
106
		props[prop] = value
107
		Unite u = analecCorpus.addUniteSaisie(unit_type, m.getStart(), m.getEnd(), props)
108
		//	println "$props -> "+u.getProps()
109
	}
110
}
111
println "$n $unit_type created."
112
if (n > 0) corpus.setIsModified(true);
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsDeleteMacro.groovy (revision 2094)
43 43
		int minimum_schema_size
44 44
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
45 45
		String unit_ursql
46
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int limit_distance_in_schema
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
49
		limit_cql
46
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int position_in_schema
48
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
49
		cql_limit
50 50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
51 51
		boolean strict_inclusion
52
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int limit_distance
52
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int position
54 54

  
55 55
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
56 56
		debug
......
69 69
	Structure structure = analecCorpus.getStructure()
70 70

  
71 71
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE,
72
			unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
72
			unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
73 73
		
74 74
	def n = 0
75 75
	def nerrors = 0
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy (revision 2094)
43 43
		int minimum_schema_size
44 44
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
45 45
		String unit_ursql
46
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int limit_distance_in_schema
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
49
		limit_cql
46
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int position_in_schema
48
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="<div> [] expand to div")
49
		cql_limit
50 50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
51 51
		boolean strict_inclusion
52
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int limit_distance
52
@Field @Option(name="position", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int position
54 54

  
55 55
@Field @Option(name="unit_property_to_set", usage="PROP", widget="String", required=false, def="TESTPROP")
56 56
		String unit_property_to_set
......
80 80
	analecView.initVueParDefaut()
81 81

  
82 82
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE,
83
			unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
83
			unit_ursql, position_in_schema, cql_limit, strict_inclusion, position);
84 84
		
85 85
	println " "+selectedUnits.size()+" units to annotate..."
86 86
	def n = 0
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2094)
93 93
 * @param minimum_schema_size
94 94
 * @param maximum_schema_size
95 95
 * @param unit_ursql
96
 * @param limit_cql
96
 * @param cql_limit
97 97
 * @param strict_inclusion
98
 * @param limit_distance
98
 * @param position
99 99
 * @return
100 100
 */
101 101
static def selectUnitsInSchema(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus,
102 102
		String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size,
103
		String unit_ursql, Integer limit_distance_in_schema, CQLQuery limit_cql, Boolean strict_inclusion, int limit_distance) {
103
		String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position) {
104 104
	def groupedUnits = []
105 105
	if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1) {
106 106
		def allSchema = null;
......
114 114

  
115 115
		groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql)
116 116
		
117
		if (limit_distance_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, limit_distance_in_schema)
117
		if (position_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema)
118 118

  
119 119
	} else {
120 120
		groupedUnits = ["all":AnalecUtils.findAllInCorpus(debug, analecCorpus, Unite.class, unit_ursql)]
......
122 122
	if (debug >= 2) println "groupedUnits=${groupedUnits.size()}"
123 123

  
124 124
	def matches = null
125
	if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) {
126
		Subcorpus limitssubcorpus = corpus.createSubcorpus(limit_cql, corpus.getID().toUpperCase())
125
	if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
126
		Subcorpus limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getID().toUpperCase())
127 127
		matches = limitssubcorpus.getMatches();
128 128
		limitssubcorpus.delete();
129 129
	} else {
......
132 132
	if (debug >= 2) println "matches=${matches}"
133 133
	def allUnits = []
134 134
	for (def k : groupedUnits.keySet()) {
135
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, limit_distance)
135
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position)
136 136
		allUnits.addAll(selectedUnits)
137 137
	}
138 138
	if (debug >= 2) println "selectedUnits=${allUnits.size()}"
......
306 306
	return selectedUnitsPerMatch
307 307
}
308 308

  
309
static def filterUniteByInclusion(def debug, def allUnites, def matches, boolean strict_inclusion, int limit_distance) {
309
static def filterUniteByInclusion(def debug, def allUnites, def matches, boolean strict_inclusion, int position) {
310 310

  
311 311
	def selectedUnitsPerMatch = groupByMatch(debug, allUnites, matches, strict_inclusion);
312 312
	//println "selectedUnitsPerMatch size="+selectedUnitsPerMatch.size()
313 313
	def selectedUnits = []
314
	if (limit_distance != 0) {
315
		if (limit_distance > 0) limit_distance--
314
	if (position != 0) {
315
		if (position > 0) position--
316 316

  
317 317
		for (def m : selectedUnitsPerMatch.keySet()) {
318
			if (selectedUnitsPerMatch[m].size() > limit_distance && selectedUnitsPerMatch[m].size() > 0) {
318
			if (selectedUnitsPerMatch[m].size() > position && selectedUnitsPerMatch[m].size() > 0) {
319 319
				def units = selectedUnitsPerMatch[m]
320 320
				//println "$m -> "+units.collect() {it -> it.getDeb()}
321 321
				units = units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
322 322
				//println "$m -> "+units.collect() {it -> it.getDeb()}
323
				selectedUnits << units[limit_distance]
324
				if (debug >=3) println "dist select: "+units[limit_distance].getDeb()
323
				selectedUnits << units[position]
324
				if (debug >=3) println "dist select: "+units[position].getDeb()
325 325
			}
326 326
		}
327 327
	} else {
......
387 387
	int atidx = URSQL.indexOf("@");
388 388
	int equalidx = URSQL.indexOf("=");
389 389

  
390
	if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) {
390
	if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) { // TYPE@PROP=VALUE
391 391
		type = URSQL.substring(0, atidx)
392 392
		prop = URSQL.substring(atidx+1, equalidx)
393 393
		value = URSQL.substring(equalidx+1)
394
	} else if (atidx >= 0) {
394
	} else if (atidx >= 0) { // TYPE@PROP
395 395
		type = URSQL.substring(0, atidx)
396 396
		prop = URSQL.substring(atidx+1)
397
	} else if (equalidx >= 0) {
397
	} else if (equalidx >= 0) { // TYPE=VALUE -> not well formed
398 398
		type = URSQL.substring(0, equalidx)
399 399
		value = URSQL.substring(equalidx+1)
400
	} else {
400
	} else { // TYPE
401 401
		type = URSQL;
402 402
	}
403 403
	//	println(["'"+type+"'", "'"+prop+"'", "'"+value+"'"])
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2094)
96 96
}
97 97

  
98 98
if (duplicates.size() > 0) {
99
	println "Duplicates found"
99
	println "${duplicates.size()} duplicates found"
100 100
	for (def unit : duplicates.keySet()) {
101 101
		println AnalecUtils.toString(CQI, word, unit)+" in: "
102 102
		for (Schema schema : duplicates[unit]) {
......
104 104
		}
105 105
	}
106 106
} else {
107
	println "No duplicates found in $schema_ursql units"
107
	println "No duplicates found in $schema_ursql schema units"
108 108
}
109 109

  
110 110
return duplicates
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/VerificationsMacro.groovy (revision 2094)
1
// Auteur Matthieu Quignard
2
// Date : 14 janvier 2019
3

  
4
/**********
5
Vérifications automatiques
6
1. Repère les mentions sans catégorie : CHECK > CAT
7
2. Repère les mentions sans référent : CHECK > REF
8
3. Supprime les ponctuations en début et en fin de mention : CHECK > BORNES
9
4. Supprime les prépositions autres que 'de' en début de mention : CHECK > BORNES
10
5. Supprime automatiquement toutes les mentions vides = sans aucun mot = de longueur 0
11
6. Détecter les mentions qui ont exactement les mêmes bornes  : CHECK > DOUBLON
12
7 (option). Détecter les pronoms hors mention : CHECK > NEW
13
***********/
14

  
15
package org.txm.macro.urs.democrat
16

  
17
import org.apache.commons.lang.*
18
import org.kohsuke.args4j.*
19
import groovy.transform.*
20
import org.txm.*
21
import org.txm.rcpapplication.swt.widget.parameters.*
22
import org.txm.analec.*
23
import org.txm.searchengine.cqp.*
24
import org.txm.searchengine.cqp.corpus.*
25
import visuAnalec.Message.*
26
import visuAnalec.donnees.*
27
import visuAnalec.elements.*
28
import visuAnalec.vue.*
29

  
30
// TODO : ajouter les étiquettes équivalentes issues du tagset TreeTagger
31
// Ponctuations et Prépositions
32
def interditsAuDebut = ["PONfbl", "PONfrt", "PONpxx", "PRE"]
33
// Ponctuations
34
def interditsALaFin = ["PONfbl", "PONfrt", "PONpxx"]
35
// Pronoms en tous genres
36
def listePronoms = ["PROadv", "PROcar", "PROdem", "PROimp", "PROind", "PROint", "PROper", "PROpos", "PROrel"]
37

  
38
// CORPS DU SCRIPT
39

  
40
if (!(corpusViewSelection instanceof MainCorpus)) {
41
	println "Corpora selection is not a Corpus"
42
	return
43
}
44

  
45
// BEGINNING OF PARAMETERS
46
@Field @Option(name="unit_type", usage="Unité", widget="String", required=true, def="MENTION")
47
def unit_type
48
@Field @Option(name="pos_property_name", usage="Etiquette de morphosyntaxe", widget="String", required=true, def="frpos")
49
def pos_property_name
50
@Field @Option(name="cat_name", usage="Propriété CATEGORIE", widget="String", required=true, def="CATEGORIE")
51
def cat_name
52
@Field @Option(name="ref_name", usage="Propriété REF", widget="String", required=true, def="REF")
53
def ref_name
54
@Field @Option(name="checkPronouns", usage="Vérifier les pronoms oubliés", widget="Boolean", required=true, def="true")
55
def checkPronouns
56

  
57
if (!ParametersDialog.open(this)) return
58

  
59
corpus = corpusViewSelection
60
CQI = Toolbox.getCqiClient()
61
word = corpus.getWordProperty()
62
posProperty = corpus.getProperty(pos_property_name)
63
if (posProperty == null) {
64
	println "Error: CQP corpus does not contains the word property with name=$pos_property_name"
65
	return
66
}
67

  
68
analecCorpus = AnalecCorpora.getCorpus(corpus.getName())
69
vue = AnalecCorpora.getVue(corpus.getName())
70
structure = analecCorpus.getStructure()
71

  
72
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
73
	println "Error: corpus structure does not contains unit with name=$unit_type"
74
	return
75
}
76

  
77
if (!structure.getUniteProperties(unit_type).contains(cat_name)) { 
78
	println "Erreur : les unités $unit_type n'ont pas de propriété $cat_name"
79
	return
80
}
81

  
82
if (!structure.getUniteProperties(unit_type).contains(ref_name)) { 
83
	println "Erreur : les unités $unit_type n'ont pas de propriété $ref_name"
84
	return
85
}
86

  
87
println "Détection des pronoms oubliés : $checkPronouns"
88

  
89
// Reinitialiser la propriété CHECK
90
if (!structure.getUniteProperties(unit_type).contains("CHECK")) {
91
	analecCorpus.ajouterProp(Unite.class, unit_type, "CHECK")
92
} else {
93
	println "Nettoyage des anciennes annotations CHECK"
94
  	def tmpvalues = new HashSet()
95
	tmpvalues.addAll(structure.getValeursProp(Unite.class, unit_type, "CHECK"));
96
	for (String val : tmpvalues) {
97
		structure.supprimerVal(Unite.class, unit_type, "CHECK", val);
98
		//println "suppression de l'étiquette $val"
99
	}
100
}
101

  
102
structure.ajouterVal(Unite.class, unit_type, "CHECK", "DONE")
103

  
104
	
105

  
106
def nModified = 0
107
def nIgnored = 0
108
def nDeleted = 0
109
def nAdded = 0
110

  
111
def garbageBin = []
112

  
113
def nToks = corpus.getSize()
114
def tokenIndex = new int[nToks]
115

  
116
def i = 0
117
for (i=0 ; i< nToks ; i++) tokenIndex[i] = 1
118

  
119
errors = new HashMap()
120
def units = analecCorpus.getUnites(unit_type)
121
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
122

  
123
// pour les doublons
124
def lastUnit = null
125

  
126
for (Unite unit : units) { // process all units
127
	def erreur = ""
128
	
129
	// 1. Catégories vides
130
	def cat = unit.getProp( cat_name );
131
	if (cat == "") erreur += "CAT "
132
	
133
	// 2. Référents vides (plus grave) ; pas besoin de catégories
134
	def ref = unit.getProp( ref_name );
135
	if (ref == "") erreur += "REF "
136
	
137
	// 3. Suppression des erreurs initiales ; besoin de catégories
138
	int[] positions = null
139
	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
140
	else positions = (unit.getDeb()..unit.getFin())
141
	def Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
142
	
143
	def isOK = false
144
	while (isOK == false) {
145
	   if (interditsAuDebut.contains(Mention[0])) {
146
	      if (positions.size() == 1) {
147
	         erreur += "SUPPR"
148
	         isOK = true
149
	      } else {
150
	         def debut = unit.getDeb()
151
	         unit.setDeb(  debut + 1 )
152
	         
153
	         if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
154
			 else positions = (unit.getDeb()..unit.getFin())
155
			 Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
156
			 if (!erreur.contains("BORNESG")) erreur += "BORNESG "
157
	      }
158
	   } else { 
159
	      isOK = true
160
	   } 
161
	}
162
	
163
	
164
	// 4. Suppression des erreurs de borne de fin ; besoin de catégories
165
	if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
166
	else positions = (unit.getDeb()..unit.getFin())
167
	Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
168
	
169
	isOK = false
170
	while ((isOK == false) && (erreur != "remove")) {
171
		def n = Mention.size()
172
		if (interditsALaFin.contains(Mention[ n-1 ])) {
173
			if (positions.size() == 1) {
174
	         if (!erreur.contains("SUPPR")) erreur += "SUPPR"
175
	         isOK = true
176
	      } else {
177
	         def fin = unit.getFin()
178
	         unit.setFin(  fin - 1 )
179
	         
180
	         if (unit.getDeb() == unit.getFin()) positions = [unit.getDeb()]
181
			 else positions = (unit.getDeb()..unit.getFin())
182
			 Mention = CQI.cpos2Str(posProperty.getQualifiedName(), positions)
183
			 if (!erreur.contains("BORNESD")) erreur += "BORNESD "
184
	      }
185
		} else isOK = true
186
	}
187
	
188
	// 5. Suppression des unités problématiques (bornes incohérentes ou mot vide) ; pas besoin de catégories
189
	def forme = CQI.cpos2Str(word.getQualifiedName(), positions)[0].trim().toLowerCase()
190
	
191
	if (erreur == "remove") {}
192
	else if (unit.getFin() < unit.getDeb()) {
193
	   println "ERREUR GRAVE : segmentation incohérente"
194
	   erreur = "remove"
195
	} else if ( forme.length() == 0 ) {
196
	   println "ERREUR GRAVE : unité sans mot"
197
	   erreur = "remove"
198
	}
199
	
200
	// 6. Détection des doublons ; pas besoin de catégories
201
	if (lastUnit != null) {
202
	   if ((unit.getDeb() == lastUnit.getDeb()) && (unit.getFin() == lastUnit.getFin()) ) {
203
	   		erreur += " DOUBLON "
204
	   }
205
	}
206
	lastUnit = unit
207
	
208
	
209
	erreur = erreur.trim()
210
	if (erreur == "remove") {
211
	    garbageBin.add( unit )
212
		nDeleted++
213
	} else if (erreur != "") {
214
		vue.setValeurChamp(unit, "CHECK", erreur)
215
		nModified++
216
	} else {
217
		nIgnored++
218
	}
219
	
220
	// mise à jour des tokens couverts
221
	for (int p=unit.getDeb() ; p <= unit.getFin() ; p++) {
222
	   tokenIndex[p] = 0
223
	}
224
}
225

  
226
// Suppression effective des unités incohérentes
227
garbageBin.each {
228
   analecCorpus.supUnite( it )
229
}
230

  
231
// 7. Ajouter les pronoms non couverts par une annotation ; besoin de catégories
232
// Parcourir les tokens non couverts ; obtenir leur POS et créer une unité si c'est un pronom
233

  
234
if (checkPronouns) {
235
	println "Détection des pronoms oubliés"
236
	for (i=0 ; i < nToks ; i++) {
237
    	if (tokenIndex[i] > 0) {
238
			Mention = CQI.cpos2Str(posProperty.getQualifiedName(), i)
239
			if (listePronoms.contains(Mention[0])) {
240
	    		def props = [:]
241
        		props["CHECK"] = "NEW"
242
        		Unite u = analecCorpus.addUniteSaisie(unit_type, i, i, props)
243
        		vue.setValeurChamp(u, "CHECK", "NEW")
244
	    		nAdded++
245
	    	}
246
		}
247
	}
248
}
249

  
250
if (nAdded + nModified + nDeleted > 0) corpus.setIsModified(true);
251

  
252
println "Result:"
253
println "- $nModified units of type $unit_type have been modified."
254
println "- $nDeleted units of type $unit_type have been deleted."
255
println "- $nIgnored units of type $unit_type have not been modified."
256
println "- $nAdded forgotten pronominal units of type $unit_type have been added.\n"
257

  
258
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/RetoucheComplementDuNomMacro.groovy (revision 2094)
5 5
import org.kohsuke.args4j.*
6 6
import groovy.transform.*
7 7
import org.txm.*
8
import org.txm.rcp.swt.widget.parameters.*
9
import org.txm.annotation.urs.*
8
import org.txm.rcpapplication.swt.widget.parameters.*
9
import org.txm.analec.*
10 10
import org.txm.searchengine.cqp.*
11 11
import org.txm.searchengine.cqp.corpus.*
12 12
import visuAnalec.Message.*
......
16 16

  
17 17

  
18 18
/*  MACRO pour corriger une erreur d'annotation
19
 Retirer le "De" du complément du nom
20
 Algo : 
21
 POUR CHAQUE MENTION dont le premier mot est "de" (en minuscules)
22
 SI     il existe une autre MENTION dans laquelle celle-ci est totalement incluse
23
 ALORS  incrémenter d'un mot la frontière gauche de la mention
24
 Ajouter la categorie CDN.CHECK pour qu'on puisse verifier facilement le job.
25
 */
19
    Retirer le "de" ou le "d'" (forme élidée) du complément du nom
26 20

  
21
   Algo : 
22
   POUR CHAQUE MENTION dont le premier mot est "de" (en minuscules)
23
   SI     il existe une autre MENTION dans laquelle celle-ci est totalement incluse
24
   ALORS  incrémenter d'un mot la frontière gauche de la mention
25
   Ajouter la categorie CDN.CHECK pour qu'on puisse verifier facilement le job.
26
   
27
   Cette macro ne s'appuie pas sur des catégories morphosyntaxiques. 
28
   On peut donc la faire tourner avant les frpos2cattex ou fropos2cattex
29
   
30
   Auteur : Matthieu QUIGNARD
31
   Date : 18 janvier 2019
32
*/
33

  
27 34
// BEGINNING OF PARAMETERS
28
if (!(corpusViewSelection instanceof org.txm.searchengine.cqp.corpus.CQPCorpus)) {
29
	println "Selection must be a Corpus: "+corpusViewSelection
30
	return;
31
}
35

  
32 36
// Declare each parameter here
33 37
// BEGINNING OF PARAMETERS
34 38
@Field @Option(name="unit_type", usage="", widget="String", required=true, def="MENTION")
35
		def unit_type
39
def unit_type
36 40
@Field @Option(name="category_name", usage="", widget="String", required=true, def="CATEGORIE")
37
		def category_name
41
def category_name
38 42
if (!ParametersDialog.open(this)) return
39 43

  
40
	corpus = corpusViewSelection.getMainCorpus()
41
CQI = CQPSearchEngine.getCqiClient()
44
corpus = corpusViewSelection
45
CQI = Toolbox.getCqiClient()
42 46
word = corpus.getWordProperty()
43 47

  
44
analecCorpus = URSCorpora.getCorpus(corpus)
45
vue = URSCorpora.getVue(corpus)
48
analecCorpus = AnalecCorpora.getCorpus(corpus.getName())
49
vue = AnalecCorpora.getVue(corpus.getName())
46 50
structure = analecCorpus.getStructure()
47 51
if (!structure.getUnites().contains(unit_type)) { // check if the structure contains the unit_type units
48 52
	println "Error: corpus structure does not contains unit with name=$unit_type"
......
51 55

  
52 56

  
53 57
// Si la structure d'annotation ne contient pas CATEGORIE, on la crée avec ses valeurs
54
if (!structure.getUniteProperties(unit_type).contains(category_name)) {
55
	structure.ajouterProp(Unite.class, unit_type, category_name)
58
if (!structure.getUniteProperties(unit_type).contains(category_name)) { 
59
   structure.ajouterProp(Unite.class, unit_type, category_name)
56 60
}
57 61

  
58 62
def check_cat = "CDN.CHECK"
......
69 73
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
70 74

  
71 75
/* Test sur la premiere mention :
72
 def debut1 = units[1].getDeb()
73
 def fin1 = units[1].getFin()
74
 println "$debut1 - $fin1"
75
 units[1].setDeb( debut1 + 1)
76
 units[1].setFin( fin1 + 1)
77
 URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
78
 def debut2 = units[1].getDeb()
79
 def fin2 = units[1].getFin()
80
 println "$debut2 - $fin2"
81
 */
76
def debut1 = units[1].getDeb()
77
def fin1 = units[1].getFin()
78
println "$debut1 - $fin1"
79
units[1].setDeb( debut1 + 1)
80
units[1].setFin( fin1 + 1)
81
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
82
def debut2 = units[1].getDeb()
83
def fin2 = units[1].getFin()
84
println "$debut2 - $fin2"
85
*/
82 86

  
83 87

  
84 88
for (Unite unit : units) { // process all units
85

  
89
	
86 90
	def debut = unit.getDeb()
87 91
	def fin = unit.getFin()
88 92
	def premierMot = CQI.cpos2Str(word.getQualifiedName(), debut)[0]
89

  
90
	if (premierMot != "de") {
93
		
94
	if ((premierMot != "de") && (premierMot !="d'")) {
91 95
		nIgnored1++
92 96
		compteur++
93 97
		continue
94 98
	} else {
95
		for (i = compteur-1; i >= 0 ; i--) {
99
		for (i = compteur-1; i>=0 ; i--) {
96 100
			def u = units[i]
97 101
			def udeb = u.getDeb()
98 102
			def ufin = u.getFin()
99 103
			if (ufin >= fin) {
100
				println "\nAVANT => Unit $compteur : $debut - $fin"
101
				if (fin > debut) unit.setDeb( debut++ )
102
				else println "not resizing"
103
				def debut2 = unit.getDeb()
104
				def fin2 = unit.getFin()
105
				URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
106
				println "APRES => Unit $compteur : $debut2 - $fin2"
107
				unit.getProps().put(category_name, check_cat)
108
				break
104
			   println "\nAVANT => Unit $compteur : $debut - $fin"
105
			   if (fin > debut) unit.setDeb( ++debut ) 
106
			   else println "not resizing"
107
			   def debut2 = unit.getDeb()
108
			   def fin2 = unit.getFin()
109
			   AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
110
			   println "APRES => Unit $compteur : $debut2 - $fin2"
111
			   unit.getProps().put(category_name, check_cat)
112
			   break
109 113
			}
110 114
		}
111
		if (i < 0) nIgnored2++
115
		if (i <0) nIgnored2++
112 116
		else nModified++
113 117
		compteur++
114
	}
118
	} 
115 119
}
116 120

  
117 121
println "\nResult:"
......
121 125
println "Total ($compteur)."
122 126

  
123 127
// END OF PARAMETERS
124
URSCorpora.getVue(analecCorpus).retablirVueParDefaut()
128
AnalecCorpora.getVue(analecCorpus).retablirVueParDefaut()
125 129
println "corpora selection: "+corpusViewSelection
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/SUJ-PARTINF.txt (revision 2094)
1
Macro  : SUJ-PARTINF
2
Auteur : Matthieu QUIGNARD
3
Date   : 05 Février 2019
4

  
5
Recatégorise les mentions étiquetées en SUJ.ZERO pour distinguer les cas
6
des verbes conjugués (qui restent en SUJ.ZERO) 
7
des verbes à l'infinitif ou au participe passé/présent qui, eux, vont
8
passer sous la catégorie SUJ.PARTINF
9

  
10
Cette macro est réversible.
11
Il suffit d'invoquer la macro ChercherRemplacer et substituer
12
CATEGORIE=SUJ.PARTINF
13
en 
14
CATEGORIE=SUJ.ZERO
15

  
16
NB : fonctionne avec le tagset fro (ancien français) et le tagset TreeTagger.
17
Infinitif : VERinf  VER:infi
18
Part.Pass : VERppa  VER:pper
19
Part.Pres : VERppe   VER:ppre
0 20

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Verifications_alMacro.groovy (revision 2094)
1
// Auteur Matthieu Quignard
2
// Date : 14 janvier 2019
3

  
4
/**********
5
Vérifications automatiques
6
1. Repère les mentions sans catégorie : CHECK > CAT
7
2. Repère les mentions sans référent : CHECK > REF
8
3. Supprime les ponctuations en début et en fin de mention : CHECK > BORNES
9
4. Supprime les prépositions autres que 'de' en début de mention : CHECK > BORNES
10
5. Supprime automatiquement toutes les mentions vides = sans aucun mot = de longueur 0
11
6. Détecter les mentions qui ont exactement les mêmes bornes  : CHECK > DOUBLON
12
7 (option). Détecter les pronoms hors mention : CHECK > NEW
13
***********/
14

  
15
package org.txm.macro.urs.democrat
16

  
17
import org.apache.commons.lang.*
18
import org.kohsuke.args4j.*
19
import groovy.transform.*
20
import org.txm.*
21
import org.txm.rcpapplication.swt.widget.parameters.*
22
import org.txm.analec.*
23
import org.txm.searchengine.cqp.*
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff