Révision 2085

tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/NatureOfTheFirstUnitMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.cqp.CQPSearchEngine
17
import org.apache.commons.lang.StringUtils;
18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
22
}
23

  
24
// BEGINNING OF PARAMETERS
25
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
26
String schema_ursql
27

  
28
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
29
int minimum_schema_size
30

  
31
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
32
String unit_ursql
33

  
34
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE")
35
String word_property
36

  
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
debug
39

  
40
if (!ParametersDialog.open(this)) return;
41
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
42

  
43

  
44
CQPCorpus corpus = corpusViewSelection
45
def analecCorpus = URSCorpora.getCorpus(corpus)
46

  
47
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
48
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
49
	return;
50
}
51

  
52
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
53
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
54
	return;
55
}
56

  
57
def CQI = CQPSearchEngine.getCqiClient()
58

  
59
def prop = corpus.getProperty(word_property)
60

  
61
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
62
def freqs = [:]
63

  
64
def distances = 0;
65
def nDistances = 0
66
for (def schema : schemas) {
67
	
68
	def allUnites = schema.getUnitesSousjacentesNonTriees()
69
	
70
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
71
	
72
	int nUnites = units.size();
73
	
74
	if (units.size() == 0) continue;
75
	
76
	def unit = units[0]
77
	
78
	String forme =  null;
79
	if (prop == null) { // word_property is the analec unit property to use
80
		forme = unit.getProp(word_property)
81
	} else {
82
		int[] pos = null;
83
		if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
84
		else pos = unit.getDeb()..unit.getFin()
85
			
86
		forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
87
	}
88
	
89
	if (!freqs.containsKey(forme)) freqs[forme] = 0;
90
	
91
	freqs[forme] = freqs[forme] + 1;
92
}
93

  
94
println "Index des natures de premier maillon :"
95
int max = 0;
96
def result = "";
97
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
98
	println "$forme\t"+freqs[forme]
99
	if (max < freqs[forme]) {
100
		max = freqs[forme]
101
		result = "$forme: "+freqs[forme]
102
	}
103
}
104

  
105
["result": result, "data": freqs]
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/NumberOfSchemaMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
// BEGINNING OF PARAMETERS
22
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
23
String schema_ursql
24

  
25
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
26
int minimum_schema_size
27

  
28
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
29
debug
30

  
31
if (!ParametersDialog.open(this)) return;
32
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
33

  
34

  
35
CQPCorpus corpus = corpusViewSelection
36
def analecCorpus = URSCorpora.getCorpus(corpus)
37

  
38
// check Schema parameters
39
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
40
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
41
	return;
42
}
43

  
44
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
45

  
46
int nSchemas = schemas.size();
47

  
48
println "Nombre de chaînes de référence d'un texte : $nSchemas"
49

  
50
["result":nSchemas, "data":schemas]
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/GrammaticalCategoryMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.*
16
import org.txm.searchengine.cqp.corpus.*
17
import org.apache.commons.lang.StringUtils;
18

  
19
// BEGINNING OF PARAMETERS
20

  
21
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
22
String schema_ursql
23

  
24
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
25
int minimum_schema_size
26

  
27
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
28
String schema_display_property_name
29

  
30
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
31
String unit_ursql
32

  
33
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE")
34
String property
35

  
36
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
37
debug
38

  
39
if (!(corpusViewSelection instanceof CQPCorpus)) {
40
	println "Corpora selection is not a Corpus"
41
	return;
42
}
43

  
44
// Open the parameters input dialog box
45
if (!ParametersDialog.open(this)) return;
46
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
47

  
48
// END OF PARAMETERS
49

  
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
54
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
55
	return;
56
}
57

  
58
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
59
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
60
	return;
61
}
62

  
63
def CQI = CQPSearchEngine.getCqiClient()
64

  
65
def prop = corpus.getProperty(property)
66
if (prop == null) {
67
	analecCorpus.getStructure()
68
}
69
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
70
def allFreqs = [:]
71
def n = 0
72
for (def schema : schemas) {
73
	n++
74
	
75
	def freqs = [:]
76
		
77
	def allUnites = schema.getUnitesSousjacentesNonTriees()
78

  
79
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
80
	
81
	for (def unit : units) { // no need to sort units
82

  
83
		String forme =  null;
84
		if (prop == null) { // property is the analec unit property to use
85
			forme = unit.getProp(property)
86
		} else {
87
			int[] pos = null;
88
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
89
			else pos = unit.getDeb()..unit.getFin()
90
				
91
			forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
92
		}
93
		
94
		if (!freqs.containsKey(forme)) freqs[forme] = 0;
95
		freqs[forme] = freqs[forme] + 1;
96
		
97
		if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0;
98
		allFreqs[forme] = allFreqs[forme] + 1;
99
	}
100
	
101
	if (schema_display_property_name != null) {
102
		println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : "
103
	} else {
104
		println "Index des natures de $schema_ursql - $n : "
105
	}
106
	
107
	int max = 0;
108
	def result = "";
109
	for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
110
		println forme.key+"\t"+forme.value
111
	}
112
}
113

  
114
int max = 0;
115
def result = "";
116

  
117
println "Index des natures de $schema_ursql : "
118
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
119
	println forme.key+"\t"+forme.value
120
	if (max < forme.value) {
121
		max = forme.value
122
		result = "$forme: "+forme.value
123
	}
124
}
125

  
126
return ["result":result, "data":allFreqs]
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesNotInSchemaMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
21
String schema_type
22
if (!ParametersDialog.open(this)) return;
23

  
24
MainCorpus corpus = corpusViewSelection
25
def analecCorpus = URSCorpora.getCorpus(corpus);
26

  
27
def unitesInSchema = new HashSet()
28
for (def schema : analecCorpus.getSchemas(schema_type)) {
29
	unitesInSchema.addAll(schema.getUnitesSousjacentes())
30
}
31
println "unites: "+analecCorpus.getToutesUnites().size()
32
println "unites in schema: "+unitesInSchema.size()
33

  
34
def set = new HashMap()
35
for (def u : analecCorpus.getToutesUnites()) {
36
	if (unitesInSchema.contains(u)) continue;
37
	
38
	if (!set.containsKey(u.getType())) set[u.getType()] = 0;
39
	set[u.getType()] = set[u.getType()] +1
40
}
41

  
42
println "unites not in schema: "+set.sort() { it -> set[it]}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitsProgressionMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import java.util.ArrayList;
9
import java.util.List;
10

  
11
import org.apache.commons.lang.StringUtils
12
import org.jfree.chart.JFreeChart
13
import org.jfree.chart.plot.XYPlot
14
import org.kohsuke.args4j.*
15

  
16
import groovy.transform.Field
17

  
18
import org.txm.Toolbox
19
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer
20
import org.txm.progression.core.functions.Progression
21
import org.txm.rcp.swt.widget.parameters.*
22
import org.txm.annotation.urs.*
23
import org.txm.chartsengine.rcp.editors.ChartEditor
24
import org.txm.macro.urs.AnalecUtils
25
import org.txm.searchengine.cqp.AbstractCqiClient
26
import org.txm.searchengine.cqp.corpus.*
27
import org.txm.searchengine.cqp.corpus.query.Match;
28
import org.txm.searchengine.cqp.corpus.query.CQLQuery
29
import org.txm.rcp.Application
30
import org.txm.rcp.IImageKeys
31

  
32
import visuAnalec.donnees.Structure
33
import visuAnalec.elements.*
34

  
35
def scriptName = this.class.getSimpleName()
36
def parent
37
def selection = []
38
if (!(corpusViewSelection instanceof CQPCorpus)) {
39
	println "** $scriptName please select a Corpus to run the macro"
40
}
41
selection << corpusViewSelection
42
parent = corpusViewSelection
43

  
44
// BEGINNING OF PARAMETERS
45
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
46
		String schema_ursql
47
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
48
		int minimum_schema_size
49
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
50
		int maximum_schema_size
51
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
52
		String unit_ursql
53
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
54
		int limit_distance_in_schema
55
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
56
		limit_cql
57
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
58
		boolean strict_inclusion
59
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
60
		int limit_distance
61
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE")
62
		String unit_property_display
63
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div")
64
		String struct_name
65
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n")
66
		String struct_prop
67
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1")
68
		int line_width = 2
69
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f")
70
		float bande_width = 1.0f
71
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
72
		debug
73
if (!ParametersDialog.open(this)) return
74
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
75

  
76

  
77
	def CQI = CQPSearchEngine.getCqiClient()
78

  
79
def queries = []
80
def queryResults = []
81
def informations = []
82
for (def corpus : selection) {
83
	
84
	mainCorpus = corpus.getMainCorpus()
85

  
86
	def word = mainCorpus.getWordProperty()
87
	def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName())
88

  
89
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, 
90
	unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
91

  
92
	def query = ""
93
	if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql
94
	if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"}
95
	if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql
96
	query = new CQLQuery(query)
97
	int[] starts = new int[selectedUnits.size()];
98
	int[] ends = new int[selectedUnits.size()];
99
	def unitsinformations = []
100
	int n = 0;
101
	for (Unite unite : selectedUnits) {
102
		starts[n] = unite.getDeb()
103
		ends[n] = unite.getFin()
104
		unitsinformations << AnalecUtils.toString(CQI, word, unite);
105
		n++
106
	}
107
	def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null)
108
	queries << query
109
	queryResults << queryResult
110
	informations << unitsinformations
111

  
112
	if (unit_property_display != null && unit_property_display.length() > 0) {
113
		def propvalues = [:]
114
		for (def unit : selectedUnits) {
115
			def v = unit.getProp(unit_property_display)
116
			if (v == null) v = "<null>"
117
			else if (v.length() == 0) v = "<empty>"
118
			
119
			if (!propvalues.containsKey(v))propvalues[v] = []
120
			propvalues[v] << unit
121
		}
122
		
123
		for (def v : propvalues.keySet().sort()) {
124
			selectedUnits = propvalues[v]
125
			query = corpus.getID()+" "+limit_cql
126
			query = new CQLQuery(v)
127
			starts = new int[selectedUnits.size()];
128
			ends = new int[selectedUnits.size()];
129
			unitsinformations = []
130
			n = 0;
131
			for (Unite unite : selectedUnits) {
132
				starts[n] = unite.getDeb()
133
				ends[n] = unite.getFin()
134
				unitsinformations << AnalecUtils.toString(CQI, word, unite);
135
				n++
136
			}
137
			queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null)
138
			queries << query
139
			queryResults << queryResult
140
			informations << unitsinformations
141
		}
142
	}
143
}
144

  
145
corpus = parent
146
try {
147
	def struct = corpus.getStructuralUnit(struct_name)
148
	def struct_p = struct.getProperty(struct_prop)
149

  
150
	Progression progression = new Progression(corpus, queries,
151
			struct, struct_p,	".*",
152
			true, false, false,
153
			line_width, false, bande_width)
154

  
155
	progression.stepQueries(queryResults); // new
156

  
157
	if (!progression.stepStructuralUnits() || monitor.isCanceled())	return
158
		monitor.worked(20)
159
	if (!progression.stepFinalize() || monitor.isCanceled()) return
160
		monitor.worked(20)
161

  
162
	monitor.syncExec(new Runnable() {
163
				@Override
164
				public void run() {
165
					try {
166
						ChartEditor charteditorpart = SWTChartsComponentProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative()))
167
						JFreeChart chart = charteditorpart.getChart()
168
						def plot = chart.getXYPlot()
169
						ProgressionItemSelectionRenderer renderer = plot.getRenderer();
170
						renderer.setAdditionalLabelInformation(informations)
171
					} catch(Exception e) {e.printStackTrace()}
172
				}
173
			})
174

  
175
} catch(Exception e) {
176
	e.printStackTrace()
177
	return false
178
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/SchemasListOldMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macroprototypes.urs.misc
8

  
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.Toolbox
15
import org.txm.rcp.commands.*
16
import org.apache.commons.lang.StringUtils
17

  
18
// BEGINNING OF PARAMETERS
19
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAIE")
20
String schema_type
21

  
22
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3")
23
int minimum_schema_size
24

  
25
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
26
String schema_property_name
27

  
28
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
29
String schema_property_value
30

  
31
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
32
String unit_type
33

  
34
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
35
String unit_property_name
36

  
37
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
38
String unit_property_value
39

  
40
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word	lemma	frlemma	frolemma	#forme#	id", required=false, def="word")
41
String word_property
42

  
43
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ")
44
String separator
45

  
46
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false')
47
def buildCQL
48

  
49
if (!(corpusViewSelection instanceof MainCorpus)) {
50
	println "Corpus view selection is not a Corpus"
51
	return;
52
}
53

  
54
if (!ParametersDialog.open(this)) return;
55
// END OF PARAMETERS
56

  
57
MainCorpus corpus = corpusViewSelection
58
def analecCorpus = URSCorpora.getCorpus(corpus)
59

  
60
// check Schema parameters
61
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
62
	println "No schema with name=$schema_type"
63
	return;
64
} else {
65
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
66
		// test property existance
67
		def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
68
		if (!props.contains(schema_property_name)) {
69
			println "Schema $schema_type has no property named $schema_property_name"
70
			return;
71
		}
72
	}
73
}
74

  
75
// check unit parameters
76
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
77
	println "No unit with name=$unit_type"
78
	return;
79
} else {
80
	if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
81
		// test property existance
82
		def props = analecCorpus.getStructure().getUniteProperties(unit_type);
83
		if (!props.contains(unit_property_name)) {
84
			println "Unit $unit_type has no property named $unit_property_name"
85
			return;
86
		}
87
	}
88
}
89

  
90
def CQI = CQPSearchEngine.getCqiClient()
91

  
92
if (buildCQL) {
93
	word_prop = corpus.getProperty("id")
94
} else {
95
	word_prop = corpus.getProperty(word_property)
96
}
97

  
98
def schemas = analecCorpus.getSchemas(schema_type)
99
schemas.sort() {it.getProps()}
100
def nSchemas = 0
101

  
102
def lens = [:]
103
for (def schema : schemas) {
104

  
105
	if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
106
		if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
107
			// ignoring this schema
108
			continue
109
		}
110
	}
111
	
112
	def nUnites = 0
113
	for (def unit : schema.getUnitesSousjacentes()) {
114
		if (unit_type.length() > 0) {
115
			if (!unit.getType().equals(unit_type)) {
116
				continue
117
			}
118
		}
119
		
120
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
121
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
122
				// ignoring this schema
123
				continue
124
			}
125
		}
126

  
127
		nUnites++
128
	}
129

  
130
	if (nUnites < minimum_schema_size) continue
131

  
132
	print schema.getProps().toString()+ ": "
133
	def first = true
134
	for (def unit : schema.getUnitesSousjacentes()) {
135
		if (unit_type.length() > 0) {
136
			if (!unit.getType().equals(unit_type)) {
137
				continue
138
			}
139
		}
140
		
141
		if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
142
			if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
143
				// ignoring this schema
144
				continue
145
			}
146
		}
147

  
148
		String forme =  null;
149

  
150
		if (buildCQL) {
151
			int[] pos = null
152
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
153
			else pos = (unit.getDeb()..unit.getFin())
154
			def first2= true
155
			q = ""
156
			pos.each {
157
				if (first2) { first2 = false } else { q = q+" " }
158
				int[] pos2 = [it]
159
				q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]"
160
			}
161
			if (first) { first = false } else { print "|" }
162
			print "("+q+")"
163
		} else {
164
			if (word_prop == null) { // word_property is the analec unit property to use
165
			forme = unit.getProp(word_property)
166
			} else {
167
			int[] pos = null
168
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
169
			else pos = (unit.getDeb()..unit.getFin())
170
				
171
			forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
172
			}
173

  
174
			if (first) { first = false } else { print separator }
175
			print forme
176
		}
177
	}
178
	println ""
179

  
180
	nSchemas++
181
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/EmptyPropValuesMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils
9
import org.kohsuke.args4j.*
10

  
11
import groovy.transform.Field
12

  
13
import org.txm.Toolbox
14
import org.txm.rcp.swt.widget.parameters.*
15
import org.txm.annotation.urs.*
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.searchengine.cqp.AbstractCqiClient
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.corpus.query.CQLQuery
20

  
21
import visuAnalec.donnees.Structure
22
import visuAnalec.elements.*
23

  
24
def scriptName = this.class.getSimpleName()
25

  
26
def selection = []
27
for (def s : corpusViewSelections) {
28
	if (s instanceof CQPCorpus) selection << s
29
	else if (s instanceof Partition) selection.addAll(s.getParts())
30
}
31

  
32
if (selection.size() == 0) {
33
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
34
	return false
35
}
36

  
37
// BEGINNING OF PARAMETERS
38
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
39
		String schema_ursql
40
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
41
		int minimum_schema_size
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
43
		String unit_ursql
44
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
45
		limit_cql
46
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
47
		boolean strict_inclusion
48
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
49
		int limit_distance
50
@Field @Option(name="debug", usage="Show internal variable content", widget="Boolean", required=true, def="false")
51
		debug
52
if (!ParametersDialog.open(this)) return
53

  
54
	def CQI = CQPSearchEngine.getCqiClient()
55

  
56
//corpus = corpusViewSelection
57
for (def corpus : selection) {
58

  
59
	mainCorpus = corpus.getMainCorpus()
60

  
61
	def word = mainCorpus.getWordProperty()
62
	def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName())
63

  
64
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE,
65
			unit_ursql, limit_cql, strict_inclusion, limit_distance);
66

  
67
	for (def unit : selectedUnits) {
68
		def props = unit.getProps();
69
		for (def k : props.keySet()) {
70
			if (props[k] == null) {
71
				println "$corpus\t"+unit.getDeb()+"->"+unit.getFin()+"\t"+k
72
			}
73
		}
74
	}
75
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="MENTION")
21
String schema_type
22

  
23
if (!ParametersDialog.open(this)) return;
24

  
25
MainCorpus corpus = corpusViewSelection
26
def analecCorpus = URSCorpora.getCorpus(corpus);
27
def map = new HashMap()
28
def unitesInSchema = []
29
def n = 0
30
for (def schema : analecCorpus.getSchemas(schema_type)) {
31
	def unites = schema.getUnitesSousjacentes()
32
	unitesInSchema.addAll(unites)
33
	n += unites.size()
34
}
35

  
36
def counts = unitesInSchema.countBy() { it };
37
for (def c : counts.keySet()) {
38
	if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()}
39
}
40

  
41
def set = new HashSet()
42
set.addAll(unitesInSchema)
43
for (def s : set.collect { it.getType() }) {
44
	if (!map.containsKey(s)) map[s] = 0;
45
	map[s] = map[s] +1
46
}
47

  
48
println "Unites types: "+map.sort() { it -> map[it]}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitsCorrelationMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils
9
import org.txm.rcp.views.corpora.CorporaView
10
import groovy.transform.Field
11

  
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.annotation.urs.*
15
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl
16
import org.txm.macro.urs.*
17
import org.txm.rcp.commands.*
18
import org.txm.rcp.swt.widget.parameters.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20
import org.txm.searchengine.cqp.corpus.*
21
import org.txm.searchengine.cqp.corpus.query.CQLQuery;
22
import org.txm.statsengine.r.core.RWorkspace
23

  
24
import visuAnalec.donnees.*
25
import visuAnalec.elements.*
26
import cern.colt.matrix.DoubleFactory2D
27
import cern.colt.matrix.DoubleMatrix2D
28

  
29
def scriptName = this.class.getSimpleName()
30
def parent
31
def selection = []
32
if (!(corpusViewSelection instanceof CQPCorpus)) {
33
	println "** $scriptName please select a Corpus to run the macro"
34
}
35

  
36

  
37
// BEGINNING OF PARAMETERS
38
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
39
		String schema_ursql
40
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
41
		int minimum_schema_size
42
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
43
		int maximum_schema_size
44
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
45
		String unit_ursql
46
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47
		int limit_distance_in_schema
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
49
		limit_cql
50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
51
		boolean strict_inclusion
52
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53
		int limit_distance
54
@Field @Option(name="unit_prop1", usage="PROP1", widget="String", required=false, def="PROP1")
55
		String unit_prop1
56
@Field @Option(name="unit_prop2", usage="PROP2", widget="String", required=false, def="PROP2")
57
		String unit_prop2
58
@Field @Option(name="corr_method", usage="try them all", widget="StringArray", metaVar="pearson	spearman	kendall", required=false, def="pearson")
59
		String corr_method
60
@Field @Option(name="corr_style", usage="try them all", widget="StringArray", metaVar="circle	square	ellipse	number	shade	color	pie", required=false, def="number")
61
		String corr_style
62
@Field @Option(name="corr_layout", usage="try them all", widget="StringArray", metaVar="full	lower	upper", required=false, def="upper")
63
		String corr_layout
64
@Field @Option(name="corr_order", usage="try them all", widget="StringArray", metaVar="AOE	FPC	hclust	alphabet", required=false, def="hclust")
65
		String corr_order
66
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false")
67
		output_lexicaltable
68
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
69
		debug
70
if (!ParametersDialog.open(this)) return
71
	if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
72

  
73

  
74
def CQI = CQPSearchEngine.getCqiClient()
75

  
76
def correlations = [:]
77
def values1 = new HashSet()
78
def values2 = new HashSet()
79
def corpus = corpusViewSelection
80

  
81
mainCorpus = corpus.getMainCorpus()
82

  
83
def word = mainCorpus.getWordProperty()
84
def analecCorpus = URSCorpora.getCorpus(mainCorpus)
85

  
86

  
87

  
88
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
89
		unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
90

  
91
for (def unit : selectedUnits) {
92
	def value1 = unit.getProp(unit_prop1)
93
	if (value1 == null) value1 = "<null>"
94
	if (value1.length() == 0) value1 = "<empty>"
95
	def value2 = unit.getProp(unit_prop2)
96
	if (value2 == null) value2 = "<null>"
97
	if (value2.length() == 0) value2 = "<empty>"
98

  
99
	values1 << value1
100
	values2 << value2
101

  
102
	if (!correlations.containsKey(value1)) correlations[value1] = [:]
103
	def line = correlations[value1]
104
	if (!line.containsKey(value2)) line[value2] = 0
105
	line[value2] += 1
106
}
107

  
108
def matrix = new int[values1.size()][values2.size()];
109
println "\t"+values2.join("\t")
110
int i = 0;
111
for (def value1 : values1) {
112
	print value1
113
	int j = 0;
114
	for (def value2 : values2) {
115
		if (correlations[value1][value2] == null) correlations[value1][value2] = 0;
116
		print "\t"+correlations[value1][value2]
117

  
118
		matrix[i][j] = correlations[value1][value2]
119
		j++
120
	}
121
	println ""
122
	i++
123
}
124

  
125
def r = RWorkspace.getRWorkspaceInstance()
126
r.addVectorToWorkspace("corrlines", values1 as String[])
127
r.addVectorToWorkspace("corrcols", values2 as String[])
128
r.addMatrixToWorkspace("corrmatrix", matrix)
129
r.eval("rownames(corrmatrix) = corrlines")
130
r.eval("colnames(corrmatrix) = corrcols")
131

  
132
def resultsDir = new File(Toolbox.getTxmHomePath(), "results")
133
resultsDir.mkdirs()
134
file = File.createTempFile("txm_corr_pairs_", ".svg", resultsDir)
135

  
136
def title = "${corpus.getMainCorpus()}.${corpus}\n${unit_ursql}"
137
if (limit_distance > 1) title += "[${limit_distance}]."
138
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)"
139
title += "\t P1=$unit_prop1 P2=unit_prop2"
140

  
141
def plotScript = """
142

  
143
r1 = cor(corrmatrix, use="complete.obs", method="$corr_method");
144
r2 = cov(corrmatrix, use="complete.obs") ;
145

  
146
corrplot(r1, type="$corr_layout", order="$corr_order", method="$corr_style")
147
"""
148

  
149

  
150
// execute R script
151
if (!output_lexicaltable) {
152
	try {
153
		r.eval("library(corrplot)")
154
		try {
155
			r.plot(file, plotScript)
156
		} catch (Exception e) {
157
			println "** Error: "+e
158
		}
159
	} catch (Exception e) {
160
		println "** The 'corrplot' R package is not installed. Start R ("+RWorkspace.getExecutablePath()+") and run 'install.packages(\"corrplot\");'."
161
	}
162
}
163
title = "$unit_prop1 $corr_method correlations"
164

  
165

  
166
def lt = null;
167
if (output_lexicaltable) {
168
	mFactory = DoubleFactory2D.dense
169
	dmatrix = mFactory.make(values1.size(), values2.size())
170
	for (int ii = 0 ; ii < values1.size() ; ii++) {
171
		for (int jj = 0 ; jj < values2.size() ; jj++) {
172
			dmatrix.set(ii, jj, matrix[ii][jj])
173
		}
174
	}
175
	if (corpusViewSelection instanceof Partition) {
176
		lt = new LexicalTableImpl(dmatrix, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"),
177
				values1 as String[], values2 as String[])
178
		lt.setCorpus(corpusViewSelection.getCorpus());
179
		corpusViewSelection.storeResult(lt)
180
	} else {
181
		lt = new LexicalTableImpl(dmatrix, corpus.getProperty("word"),
182
				values1 as String[], values2 as String[])
183
		lt.setCorpus(corpus);
184
		corpus.storeResult(lt)
185
	}
186
}
187

  
188
monitor.syncExec(new Runnable() {
189
			@Override
190
			public void run() { try {
191

  
192
					if (output_lexicaltable) {
193
						CorporaView.refreshObject(corpus)
194
						CorporaView.expand(lt)
195
					} else {
196
						OpenBrowser.openfile(file.getAbsolutePath(), "Correlations Units")
197
					}
198
				} catch (e) { e.printStackTrace() }}
199
		})
200

  
201
return correlations
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/SchemaTypesMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
MainCorpus corpus = corpusViewSelection
20
def analecCorpus = URSCorpora.getCorpus(corpus);
21

  
22
def schemas = analecCorpus.getTousSchemas()
23
def set = new HashMap()
24
for (def s : schemas.collect { it.getType() }) {
25
	if (!set.containsKey(s)) set[s] = 0;
26
	set[s] = set[s] +1
27
}
28
println "Schemas types: "+set.sort() { it -> set[it]}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/CompUnitPropertiesMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.Toolbox;
12
import org.txm.rcp.swt.widget.parameters.*
13
import org.txm.annotation.urs.*
14
import org.txm.searchengine.cqp.AbstractCqiClient;
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.cqp.CQPSearchEngine
17
import visuAnalec.donnees.Structure;
18
import visuAnalec.elements.Unite;
19

  
20
if (!(corpusViewSelection instanceof MainCorpus)) {
21
	println "Corpora selection is not a Corpus"
22
	return;
23
}
24

  
25
// BEGINNING OF PARAMETERS
26
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
27
String unit_type
28

  
29
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true")
30
boolean print_diff
31

  
32
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE")
33
String unit_property_name1
34

  
35
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG")
36
String unit_property_name2
37

  
38
if (!ParametersDialog.open(this)) return;
39

  
40
int n = 1;
41
int nDiff = 0;
42
MainCorpus corpus = corpusViewSelection
43
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
44
def word = corpus.getWordProperty()
45
def analecCorpus = URSCorpora.getCorpus(corpus);
46

  
47
def units = analecCorpus.getUnites(unit_type)
48
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
49
for (Unite unit : units) {
50
	int[] pos = null
51
	if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
52
	else pos = (unit.getDeb()..unit.getFin())
53
	def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ")
54
	def props = unit.getProps()
55
	def v1 = props.get(unit_property_name1);
56
	def v2 = props.get(unit_property_name2);
57
	
58
	if (v1 != v2) {
59
		if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form"
60
		nDiff++
61
	}
62
	n++
63
}
64

  
65
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values."
66
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values."
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/RelationsListMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.tools.ant.types.resources.selectors.InstanceOf;
10
import org.kohsuke.args4j.*
11

  
12
import groovy.transform.Field
13

  
14
import org.txm.Toolbox;
15
import org.txm.rcp.swt.widget.parameters.*
16
import org.txm.annotation.urs.*
17
import org.txm.searchengine.cqp.AbstractCqiClient;
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20

  
21
import visuAnalec.donnees.Structure;
22
import visuAnalec.elements.Relation
23
import visuAnalec.elements.Unite;
24

  
25
if (!(corpusViewSelection instanceof MainCorpus)) {
26
	println "Corpora selection is not a Corpus"
27
	return;
28
}
29

  
30
// BEGINNING OF PARAMETERS
31
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE")
32
String relation_type
33

  
34
if (!ParametersDialog.open(this)) return;
35

  
36
MainCorpus corpus = corpusViewSelection
37
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
38
def word = corpus.getWordProperty()
39
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus);
40

  
41
int n = 1;
42
def relations = null
43
if (relation_type.length() > 0) {
44
	relations = []
45
	for (String type : analecCorpus.getStructure().getTypes(Relation.class))
46
		relations.addAll(analecCorpus.getRelations(type))
47
} else {
48
	relations = analecCorpus.getToutesRelations()
49
}
50

  
51
for (Relation relation : relations) {
52
	def unit1 = relation.getElt1();
53
	def unit2 = relation.getElt2();
54
	def props = relation.getProps()
55
	if (unit1 instanceof Unite && unit2 instanceof Unite) {
56
		int[] pos1 = null
57
		if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()]
58
		else pos1 = (unit1.getDeb()..unit1.getFin())
59
		def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ")
60
		
61
		int[] pos2 = null
62
		if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()]
63
		else pos2 = (unit2.getDeb()..unit2.getFin())
64
		def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ")
65
		
66
		println "$n - $props : $form1 -> $form2"
67
	} else {
68
		println "$n - $props"
69
	}
70
	n++
71
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesInSchemaMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
21
String schema_type
22

  
23
if (!ParametersDialog.open(this)) return;
24

  
25
MainCorpus corpus = corpusViewSelection
26
def analecCorpus = URSCorpora.getCorpus(corpus);
27
def map = new HashMap()
28
def unitesInSchema = []
29
def n = 0
30
for (def schema : analecCorpus.getSchemas(schema_type)) {
31
	def unites = schema.getUnitesSousjacentes()
32
	unitesInSchema.addAll(unites)
33
	n += unites.size()
34
}
35

  
36
def counts = unitesInSchema.countBy() { it };
37
for (def c : counts.keySet()) {
38
	if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()}
39
}
40

  
41
def set = new HashSet()
42
set.addAll(unitesInSchema)
43
for (def s : set.collect { it.getType() }) {
44
	if (!map.containsKey(s)) map[s] = 0;
45
	map[s] = map[s] +1
46
}
47

  
48
println "Unites types: "+map.sort() { it -> map[it]}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NumberOfSchemaMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
// BEGINNING OF PARAMETERS
22
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
23
String schema_ursql
24

  
25
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
26
int minimum_schema_size
27

  
28
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
29
String unit_ursql
30

  
31
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
32
debug
33

  
34
if (!ParametersDialog.open(this)) return;
35
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
36

  
37

  
38
CQPCorpus corpus = corpusViewSelection
39
def analecCorpus = URSCorpora.getCorpus(corpus)
40

  
41
// check Schema parameters
42
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
43
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
44
	return;
45
}
46

  
47
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
48
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
49
	return;
50
}
51

  
52
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
53

  
54
int nSchemas = schemas.size();
55

  
56
println "Nombre de chaînes de référence d'un texte : $nSchemas"
57

  
58
["result":nSchemas, "data":schemas]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/ReferentialDensityMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
22
String unit_ursql
23
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
24
limit_cql
25
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
26
strict_inclusion
27
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
28
limit_distance
29

  
30
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
31
debug
32

  
33
if (!ParametersDialog.open(this)) return;
34
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
35

  
36

  
37
CQPCorpus corpus = corpusViewSelection
38
def analecCorpus = URSCorpora.getCorpus(corpus);
39

  
40
int nMots = corpus.getSize();
41

  
42
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0,
43
	unit_ursql, 0, limit_cql, strict_inclusion, limit_distance);
44

  
45
int nUnites = units.size();
46

  
47
coef = (nUnites /nMots)
48
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
49
if (nUnites >= nMots) {
50
	println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)"
51
}
52
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/StabilityScoreMacro.groovy (revision 2085)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15
import org.txm.macro.urs.AnalecUtils
16
import visuAnalec.elements.*
17
import org.txm.searchengine.cqp.CQPSearchEngine
18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
22
}
23

  
24
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
25
String schema_ursql
26

  
27
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
28
int minimum_schema_size
29

  
30
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
31
String schema_display_property_name
32

  
33
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
34
String unit_ursql
35

  
36
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
37
String word_property
38

  
39
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
40
debug
41

  
42
if (!ParametersDialog.open(this)) return;
43
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
44

  
45

  
46
def corpus = corpusViewSelection
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

  
49
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
50
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
51
	return;
52
}
53

  
54
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
55
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
56
	return;
57
}
58

  
59
def CQI = CQPSearchEngine.getCqiClient()
60

  
61
def prop = corpus.getProperty(word_property)
62

  
63
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
64
allFormesSet = new HashSet();
65
nUnitesGrandTotal = 0;
66
def coefs = []
67
int n = 1
68
for (def schema : schemas) {
69
	def formesSet = new HashSet(); // contient toutes les formes du CR courant
70
	nUnitesTotal = 0;
71
	
72
	def allUnites = schema.getUnitesSousjacentesNonTriees()
73

  
74
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75
	def nUnites = units.size()
76
	for (def unit : units) {
77
	
78
		String forme =  null;
79
		if (prop == null) { // word_property is the analec unit property to use
80
			forme = unit.getProp(word_property)
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff