Révision 2162

tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 2162)
15 15
	def params = getFilterParameters(ursql)
16 16
	def typeRegexp = params[0]
17 17
	def propRegexp = params[1]
18
	println "params=$params"
18 19
	return isPropertyDefined(clazz, analecCorpus, typeRegexp, propRegexp)
19 20
}
20 21

  
......
150 151
 * filter groups elements with the elements positions
151 152
 * 
152 153
 * 
153
 * @param groups
154
 * @param groups [schema:units list]
154 155
 * @param distance 0=no selection, 1=first, 2=second, -1 last, -2 last-last
155 156
 * @return
156 157
 */
157 158
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) {
158 159
	if (distance == 0) return groups;
159
	
160
	distance = distance-1;
160
	if (distance > 0) distance = distance-1;
161 161
	def newGroups = [:]
162 162
	for (def k : groups.keySet()) {
163
		newGroups[k] = [];
164 163
		def group = groups[k]
165
		if (Math.abs(distance) < group.size())
166
			newGroups[k] << group[distance]
164
		if (group.size() == 0) {
165
			newGroups[k] = group;
166
			continue;
167
		}
168
		def indexes = null
169
		if (distance > 0) {
170
			indexes = 0..Math.min(distance, group.size())
171
		} else {
172
			indexes = Math.max(distance, -group.size())..-1
173
		}
174
		newGroups[k] = group[indexes];
167 175
	}
168 176
	return newGroups
169 177
}
......
216 224
static def findAllInCorpus(def debug, def analecCorpus, Class elemClazz, String URSQL) {
217 225
	def params = getFilterParameters(URSQL)
218 226
	if (debug >= 2) println "PARAMS=$params"
219
	return findAllInCorpus(debug, analecCorpus, elemClazz, params[0], params[1], params[2])
227
	return findAllInCorpus(debug, analecCorpus, elemClazz, params[0], params[1], params[2], params[3])
220 228
}
221 229

  
222
static def findAllInCorpus(def debug, Corpus analecCorpus, Class elemClazz, String typeRegex, String propName, String valueRegex) {
230
static def findAllInCorpus(def debug, Corpus analecCorpus, Class elemClazz, String typeRegex, String propName, boolean eq, String valueRegex) {
223 231
	def allElements = null;
224 232

  
225 233
	if (elemClazz != null) {
......
236 244
		allElements.addAll(analecCorpus.getTousSchemas())
237 245
	}
238 246

  
239
	return filterElements(debug, allElements, typeRegex, propName, valueRegex);
247
	return filterElements(debug, allElements, typeRegex, propName, eq, valueRegex);
240 248
}
241 249

  
242 250
static def filterBySize(def elements, Integer minimum_schema_size, Integer maximum_schema_size) {
......
340 348

  
341 349
static def findAllUnitesInElements(def debug, def elements, String URSQL) {
342 350
	def params = getFilterParameters(URSQL)
343
	return findAllUnitesInElements(debug, elements, params[0], params[1], params[2])
351
	return findAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3])
344 352
}
345 353

  
346
static def findAllUnitesInElements(def debug, def elements, String typeRegex, String propName, String valueRegex) {
354
static def findAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) {
347 355
	def allElements = []
348 356

  
349 357
	for (Element element : elements) {
350
		allElements.addAll(filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, valueRegex));
358
		allElements.addAll(filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex));
351 359
	}
352 360

  
353 361
	return allElements;
......
372 380
 */
373 381
static def groupAllUnitesInElements(def debug, def elements, String URSQL) {
374 382
	def params = getFilterParameters(URSQL)
375
	return groupAllUnitesInElements(debug, elements, params[0], params[1], params[2])
383
	return groupAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3])
376 384
}
377 385

  
378
static def groupAllUnitesInElements(def debug, def elements, String typeRegex, String propName, String valueRegex) {
386
static def groupAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) {
379 387
	def allElements = [:]
380 388

  
381 389
	for (Element element : elements) {
382
		allElements[element] = filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, valueRegex);
390
		allElements[element] = filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex);
383 391
	}
384 392

  
385 393
	return allElements;
......
392 400
	String value = "";
393 401

  
394 402
	int atidx = URSQL.indexOf("@");
395
	int equalidx = URSQL.indexOf("=");
396

  
397
	if (atidx >= 0 && equalidx >= 0 && atidx < equalidx) { // TYPE@PROP=VALUE
403
	int equal_start_idx = URSQL.indexOf("=");
404
	int equal_end_idx = equal_start_idx
405
	int differentidx = URSQL.indexOf("!=");
406
	boolean eq = differentidx < 0 || differentidx != equal_start_idx-1
407
	if (!eq) {
408
		equal_start_idx--
409
	}
410
	 
411
	if (atidx >= 0 && equal_start_idx >= 0 && atidx < equal_start_idx) { // TYPE@PROP=VALUE
398 412
		type = URSQL.substring(0, atidx)
399
		prop = URSQL.substring(atidx+1, equalidx)
400
		value = URSQL.substring(equalidx+1)
413
		prop = URSQL.substring(atidx+1, equal_start_idx)
414
		value = URSQL.substring(equal_end_idx+1)
401 415
	} else if (atidx >= 0) { // TYPE@PROP
402 416
		type = URSQL.substring(0, atidx)
403 417
		prop = URSQL.substring(atidx+1)
404
	} else if (equalidx >= 0) { // TYPE=VALUE -> not well formed
405
		type = URSQL.substring(0, equalidx)
406
		value = URSQL.substring(equalidx+1)
418
	} else if (equal_start_idx >= 0) { // TYPE=VALUE -> not well formed
419
		type = URSQL.substring(0, equal_start_idx)
420
		value = URSQL.substring(equal_end_idx+1)
407 421
	} else { // TYPE
408 422
		type = URSQL;
409 423
	}
410 424
	//	println(["'"+type+"'", "'"+prop+"'", "'"+value+"'"])
411 425

  
412
	return [type, prop, value]
426
	return [type, prop, eq, value]
413 427
}
414 428

  
415 429
static def filterElements(def debug, def allElements, String URSQL) {
416 430
	def params = getFilterParameters(URSQL)
417
	return filterElements(debug, allElements, params[0], params[1], params[2])
431
	return filterElements(debug, allElements, params[0], params[1], params[2], params[3])
418 432
}
419 433

  
420
static def filterElements(def debug, def allElements, String typeRegex, String propName, String valueRegex) {
434
static def filterElements(def debug, def allElements, String typeRegex, String propName, boolean eq, String valueRegex) {
421 435
	if (debug >= 2) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'"
422 436
	if (typeRegex != null && typeRegex.length() > 0) {
423 437
		def filteredElements = []
......
438 452
			def matcher = /$valueRegex/
439 453
			for (Element element : allElements) {
440 454
				def value = element.getProp(propName)
441
				if (value != null && value ==~ matcher) {
442
					filteredElements << element
455
				if (value ==~ matcher) {
456
					if (eq)	filteredElements << element
457
				} else {
458
					if (!eq) filteredElements << element
443 459
				}
444 460
			}
445 461
		} else { // select only elements with the prop
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy (revision 2162)
62 62
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
63 63
		return;
64 64
	}
65
	
66
	errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
67
	if (errors.size() > 0) {
68
		println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
69
		return;
70
	}
71
	
65
		
72 66
	for (def type : analecCorpus.getStructure().getUnites())
73 67
		props.addAll(analecCorpus.getStructure().getUniteProperties(type));
74 68

  
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2162)
173 173
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
174 174
println "$unit_property_display\t"+selection.join("\t")
175 175

  
176
int total_freq = 0
176 177
keys.eachWithIndex { prop_val, i ->
177 178
	String line = ""
178 179
	if (prop_val.size() > 0) {
......
188 189

  
189 190
		matrix.set(i, j, freq)
190 191
		line += "\t"+freq
191

  
192
		total_freq += freq
192 193
		if (min < freq) min = freq
193 194
	}
194 195
	if (min >= output_fmin)
195 196
		println "$line"
196 197
}
197

  
198
println "\nTOTAL=$total_freq"
198 199
props = keys
199 200

  
200 201
def r = RWorkspace.getRWorkspaceInstance()
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasCoverage.groovy (revision 2162)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.urs.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.jfree.chart.JFreeChart
10
import org.jfree.chart.editor.ChartEditor
11
import org.kohsuke.args4j.*
12
import org.txm.Toolbox
13
import org.txm.annotation.urs.*
14
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences
15
import org.txm.chartsengine.r.core.RChartsEngine
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer
18
import org.txm.progression.core.functions.Progression
19
import org.txm.rcp.Application
20
import org.txm.rcp.IImageKeys
21
import org.txm.rcp.swt.widget.parameters.*
22
import org.txm.searchengine.cqp.CQPSearchEngine
23
import org.txm.searchengine.cqp.corpus.*
24
import org.txm.searchengine.cqp.corpus.query.CQLQuery
25
import org.txm.chartsengine.rcp.*;
26
import visuAnalec.elements.*
27

  
28
def scriptName = this.class.getSimpleName()
29

  
30
def selection = []
31
for (def s : corpusViewSelections) {
32
	if (s instanceof CQPCorpus) selection << s
33
	else if (s instanceof Partition) selection.addAll(s.getParts())
34
}
35

  
36
if (selection.size() == 0) {
37
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
38
	return false
39
} else {
40
	for (def c : selection) c.compute(false)
41
}
42

  
43
// BEGINNING OF PARAMETERS
44
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
45
		String schema_ursql
46
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
47
		int minimum_schema_size
48
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
49
		String schema_property_display
50
		
51
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="")
52
		String unit_ursql
53
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
54
		boolean strict_inclusion
55

  
56
@Field @Option(name="structure_properties", usage="name_prpperty,name2_property2,name3_property", widget="String", required=true, def="div_n,p_n")
57
		String structure_properties
58
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
59
		debug
60
if (!ParametersDialog.open(this)) return
61

  
62
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
63
if (structure_properties.length() == 0) return;
64

  
65
def CQI = CQPSearchEngine.getCqiClient()
66

  
67
for (def corpus : selection) {
68

  
69
	def properties = [corpus.getStructuralUnit("text").getProperty("id")]
70
	
71
	// build structural unit properties list from the "structure_properties" parameter
72
	for (def name : structure_properties.split(",")) {
73
		name = name.trim()
74
		String[] split = name.split("_", 2);
75
		if (split.length == 2) {
76
			def su = corpus.getStructuralUnit(split[0])
77
			if (su == null) {
78
				println "No Structure for name=$name"
79
			} else {
80
				def p = su.getProperty(split[1])
81
				if (p == null) {
82
					println "No Structure property for name=$name"
83
				} else {
84
					properties << p
85
				}
86
			}
87
		} else {
88
			println "Wrong structural unit name format: $name"
89
		}
90
	}
91
	def cql_limit_matches = corpus.getMatches()
92
	
93
	def word = corpus.getWordProperty()
94
	def analecCorpus = URSCorpora.getCorpus(corpus)
95
	AnalecUtils.defineProperty(Schema.class, analecCorpus, schema_ursql, "LOCALISATION")
96
	URSCorpora.getVue(corpus).initVueParDefaut()
97

  
98
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
99
	if (errors.size() > 0) {
100
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
101
		return;
102
	}
103
	
104
	errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
105
	if (errors.size() > 0) {
106
		println "** The $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
107
		return;
108
	}
109
	
110
	if (schema_property_display.length() > 0) {
111
		errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
112
		if (errors > 0) {
113
			println "Error: some Schema types don't contain the $schema_property_display property: $errors"
114
			return
115
		}
116
	}
117

  
118
	def allUnits = [:]
119
	def allHighlightedUnits = [:]
120
	def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, 0, 99999999, strict_inclusion)
121

  
122
	if (allSchemas.size() == 0) {
123
		println "No schema match for '$schema_ursql' selection. Aborting"
124
		return
125
	}
126

  
127
	if (debug) println "Building selection of units to highlight..."
128
	allHighlightedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas, unit_ursql)
129

  
130
	if (allHighlightedUnits.size() == 0) {
131
		println "No schema&unit match for '$schema_ursql' and '$unit_ursql' selection. Aborting"
132
		return
133
	}
134
	if (debug) println "allHighlightedUnits=${allHighlightedUnits.size()}"
135

  
136
	println "annotating ${allSchemas.size()} schemas..." 
137
	for (Schema schema : allSchemas) {
138
		if (debug) println "	schema="+schema.getProps()
139
		def selectedUnits = allHighlightedUnits[schema]
140

  
141
		if (selectedUnits.size() == 0) continue;
142
		selectedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedUnits, corpus.getMatches(), strict_inclusion, 0)
143

  
144
		if (selectedUnits.size() == 0) continue;
145

  
146
		if (selectedUnits.size() < minimum_schema_size) {
147
			schema.getProps()["LOCALISATION"] = "N/A"
148
			if (debug) println schema.getProp(schema_property_display)+" -> N/A"
149
			continue; // no need to go further, process next selected eleemnt of corpora view
150
		}
151

  
152

  
153
		if (debug) println "		selectedUnits=${selectedUnits.size()}"
154

  
155
		// get all positions for the selected units
156
		def positions = new TreeSet()
157
		for (def unit : selectedUnits) {
158
			positions.addAll(unit.getDeb()..unit.getFin())
159
		}
160
		int[] positions_array = positions
161
		if (debug) println "		positions=${positions.size()}"
162
		
163
		// test each property indexes
164
		for (def property : properties) {
165
			if (debug) println "		testing $property..."
166
			def idx = CQI.cpos2Struc(property.getQualifiedName(), positions_array)
167
			def hash = new HashSet()
168
			hash.addAll(idx)
169
			if (debug) println "		hash=$hash"
170
			if (hash.size() == 1) { // the units are only in ONE structure
171
				int[] struct = hash
172
				def ref = CQI.struc2Str(property.getQualifiedName(), struct)[0]
173
				println schema.getProp(schema_property_display)+" (${selectedUnits.size} units) -> "+property.getFullName()+" "+ref
174
				schema.getProps()["LOCALISATION"] = property.getFullName()+" "+ref
175
			} 
176
		}
177
	}
178
}
179

  
180
//println ""+queries.size()+" selected schemas: "+queries
181

  

Formats disponibles : Unified diff