Révision 2164

tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverage.groovy (revision 2164)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.urs.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.jfree.chart.JFreeChart
10
import org.jfree.chart.editor.ChartEditor
11
import org.kohsuke.args4j.*
12
import org.txm.Toolbox
13
import org.txm.annotation.urs.*
14
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences
15
import org.txm.chartsengine.r.core.RChartsEngine
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer
18
import org.txm.progression.core.functions.Progression
19
import org.txm.rcp.Application
20
import org.txm.rcp.IImageKeys
21
import org.txm.rcp.swt.widget.parameters.*
22
import org.txm.searchengine.cqp.CQPSearchEngine
23
import org.txm.searchengine.cqp.corpus.*
24
import org.txm.searchengine.cqp.corpus.query.CQLQuery
25
import org.txm.chartsengine.rcp.*;
26
import visuAnalec.elements.*
27

  
28
def scriptName = this.class.getSimpleName()
29

  
30
def selection = []
31
for (def s : corpusViewSelections) {
32
	if (s instanceof CQPCorpus) selection << s
33
	else if (s instanceof Partition) selection.addAll(s.getParts())
34
}
35

  
36
if (selection.size() == 0) {
37
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
38
	return false
39
} else {
40
	for (def c : selection) c.compute(false)
41
}
42

  
43
// BEGINNING OF PARAMETERS
44
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
45
		String schema_ursql
46
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
47
		int minimum_schema_size
48
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
49
		String schema_property_display
50
		
51
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="")
52
		String unit_ursql
53
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
54
		boolean strict_inclusion
55

  
56
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n")
57
		String structure_properties
58
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
59
		debug
60
if (!ParametersDialog.open(this)) return
61

  
62
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
63
if (structure_properties.length() == 0) return;
64

  
65
def CQI = CQPSearchEngine.getCqiClient()
66

  
67
for (def corpus : selection) {
68

  
69
	def properties = [corpus.getStructuralUnit("text").getProperty("id")]
70
	
71
	// build structural unit properties list from the "structure_properties" parameter
72
	for (def name : structure_properties.split(",")) {
73
		name = name.trim()
74
		String[] split = name.split("_", 2);
75
		if (split.length == 2) {
76
			def su = corpus.getStructuralUnit(split[0])
77
			if (su == null) {
78
				println "No Structure for name=$name"
79
			} else {
80
				def p = su.getProperty(split[1])
81
				if (p == null) {
82
					println "No Structure property for name=$name"
83
				} else {
84
					properties << p
85
				}
86
			}
87
		} else {
88
			println "Wrong structural unit name format: $name"
89
		}
90
	}
91
	def cql_limit_matches = corpus.getMatches()
92
	
93
	def word = corpus.getWordProperty()
94
	def analecCorpus = URSCorpora.getCorpus(corpus)
95
	AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION")
96
	URSCorpora.getVue(corpus).initVueParDefaut()
97

  
98
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
99
	if (errors.size() > 0) {
100
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
101
		return;
102
	}
103
	
104
	errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
105
	if (errors.size() > 0) {
106
		println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
107
		return;
108
	}
109
	
110
	if (schema_property_display.length() > 0) {
111
		errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
112
		if (errors > 0) {
113
			println "Error: some Schema types don't contain the $schema_property_display property: $errors"
114
			return
115
		}
116
	}
117

  
118
	def allUnits = [:]
119
	def allHighlightedUnits = [:]
120
	def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion)
121

  
122
	if (allSchemas.size() == 0) {
123
		println "No schema match for '$schema_ursql' selection. Aborting"
124
		return
125
	}
126

  
127
	if (debug) println "Building selection of units to highlight..."
128
	allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql)
129

  
130
	if (allHighlightedUnits.size() == 0) {
131
		println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting"
132
		return
133
	}
134
	if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}"
135

  
136
	println "annotating ${allSchemas.size()} schemas..." 
137
	for (Schema schema : allSchemas) {
138
		if (debug) println "	schema="+schema.getProps()
139
		def selectedUnits = allHighlightedUnits[schema]
140

  
141
		if (selectedUnits.size() == 0) continue;
142
		selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0)
143

  
144
		if (selectedUnits.size() == 0) continue;
145

  
146
		if (selectedUnits.size() < minimum_schema_size) {
147
			schema.getProps()["LOCALISATION"] = "N/A"
148
			if (debug) println schema.getProp(schema_property_display)+" -> N/A"
149
			continue; // no need to go further, process next selected eleemnt of corpora view
150
		}
151

  
152

  
153
		if (debug) println "		selectedUnits=${selectedUnits.size()}"
154

  
155
		// get all positions for the selected units
156
		def positions = new TreeSet()
157
		for (def unit : selectedUnits) {
158
			positions.addAll(unit.getDeb()..unit.getFin())
159
		}
160
		int[] positions_array = positions
161
		if (debug) println "		positions=${positions.size()}"
162
		
163
		// test each property indexes
164
		for (def property : properties) {
165
			if (debug) println "		testing $property..."
166
			def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array)
167
			def hash = new HashSet()
168
			hash.addAll(idx)
169
			if (debug) println "		hash=$hash"
170
			if (hash.size() == 1) { // the units are only in ONE structure
171
				int[] struct = hash
172
				def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0]
173
				println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref
174
				schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref
175
			} 
176
		}
177
	}
178
}
179

  
180
//println ""+queries.size()+" selected schemas: "+queries
181

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverageMacro.groovy (revision 2164)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.urs.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.jfree.chart.JFreeChart
10
import org.jfree.chart.editor.ChartEditor
11
import org.kohsuke.args4j.*
12
import org.txm.Toolbox
13
import org.txm.annotation.urs.*
14
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences
15
import org.txm.chartsengine.r.core.RChartsEngine
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer
18
import org.txm.progression.core.functions.Progression
19
import org.txm.rcp.Application
20
import org.txm.rcp.IImageKeys
21
import org.txm.rcp.swt.widget.parameters.*
22
import org.txm.searchengine.cqp.CQPSearchEngine
23
import org.txm.searchengine.cqp.corpus.*
24
import org.txm.searchengine.cqp.corpus.query.CQLQuery
25
import org.txm.chartsengine.rcp.*;
26
import visuAnalec.elements.*
27

  
28
def scriptName = this.class.getSimpleName()
29

  
30
def selection = []
31
for (def s : corpusViewSelections) {
32
	if (s instanceof CQPCorpus) selection << s
33
	else if (s instanceof Partition) selection.addAll(s.getParts())
34
}
35

  
36
if (selection.size() == 0) {
37
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
38
	return false
39
} else {
40
	for (def c : selection) c.compute(false)
41
}
42

  
43
// BEGINNING OF PARAMETERS
44
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
45
		String schema_ursql
46
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
47
		int minimum_schema_size
48
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
49
		String schema_property_display
50
		
51
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="")
52
		String unit_ursql
53
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
54
		boolean strict_inclusion
55

  
56
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n")
57
		String structure_properties
58
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
59
		debug
60
if (!ParametersDialog.open(this)) return
61

  
62
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
63
if (structure_properties.length() == 0) return;
64

  
65
def CQI = CQPSearchEngine.getCqiClient()
66

  
67
for (def corpus : selection) {
68

  
69
	def properties = [corpus.getStructuralUnit("text").getProperty("id")]
70
	
71
	// build structural unit properties list from the "structure_properties" parameter
72
	for (def name : structure_properties.split(",")) {
73
		name = name.trim()
74
		String[] split = name.split("_", 2);
75
		if (split.length == 2) {
76
			def su = corpus.getStructuralUnit(split[0])
77
			if (su == null) {
78
				println "No Structure for name=$name"
79
			} else {
80
				def p = su.getProperty(split[1])
81
				if (p == null) {
82
					println "No Structure property for name=$name"
83
				} else {
84
					properties << p
85
				}
86
			}
87
		} else {
88
			println "Wrong structural unit name format: $name"
89
		}
90
	}
91
	def cql_limit_matches = corpus.getMatches()
92
	
93
	def word = corpus.getWordProperty()
94
	def analecCorpus = URSCorpora.getCorpus(corpus)
95
	AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION")
96
	URSCorpora.getVue(corpus).initVueParDefaut()
97

  
98
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
99
	if (errors.size() > 0) {
100
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
101
		return;
102
	}
103
	
104
	errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
105
	if (errors.size() > 0) {
106
		println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
107
		return;
108
	}
109
	
110
	if (schema_property_display.length() > 0) {
111
		errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
112
		if (errors > 0) {
113
			println "Error: some Schema types don't contain the $schema_property_display property: $errors"
114
			return
115
		}
116
	}
117

  
118
	def allUnits = [:]
119
	def allHighlightedUnits = [:]
120
	def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion)
121

  
122
	if (allSchemas.size() == 0) {
123
		println "No schema match for '$schema_ursql' selection. Aborting"
124
		return
125
	}
126

  
127
	if (debug) println "Building selection of units to highlight..."
128
	allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql)
129

  
130
	if (allHighlightedUnits.size() == 0) {
131
		println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting"
132
		return
133
	}
134
	if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}"
135

  
136
	println "annotating ${allSchemas.size()} schemas..." 
137
	for (Schema schema : allSchemas) {
138
		if (debug) println "	schema="+schema.getProps()
139
		def selectedUnits = allHighlightedUnits[schema]
140

  
141
		if (selectedUnits.size() == 0) continue;
142
		selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0)
143

  
144
		if (selectedUnits.size() == 0) continue;
145

  
146
		if (selectedUnits.size() < minimum_schema_size) {
147
			schema.getProps()["LOCALISATION"] = "N/A"
148
			if (debug) println schema.getProp(schema_property_display)+" -> N/A"
149
			continue; // no need to go further, process next selected eleemnt of corpora view
150
		}
151

  
152

  
153
		if (debug) println "		selectedUnits=${selectedUnits.size()}"
154

  
155
		// get all positions for the selected units
156
		def positions = new TreeSet()
157
		for (def unit : selectedUnits) {
158
			positions.addAll(unit.getDeb()..unit.getFin())
159
		}
160
		int[] positions_array = positions
161
		if (debug) println "		positions=${positions.size()}"
162
		
163
		// test each property indexes
164
		for (def property : properties) {
165
			if (debug) println "		testing $property..."
166
			def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array)
167
			def hash = new HashSet()
168
			hash.addAll(idx)
169
			if (debug) println "		hash=$hash"
170
			if (hash.size() == 1) { // the units are only in ONE structure
171
				int[] struct = hash
172
				def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0]
173
				println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref
174
				schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref
175
			} 
176
		}
177
	}
178
}
179

  
180
//println ""+queries.size()+" selected schemas: "+queries
181

  

Formats disponibles : Unified diff