Revision 2167 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy

AnalecUtils.groovy (revision 2167)
11 11

  
12 12

  
13 13
static def isPropertyDefined(Class clazz, Corpus analecCorpus, String ursql) {
14
	if (ursql == null || ursql.length() == 0) return new HashSet() 
14
	if (ursql == null || ursql.length() == 0) return new HashSet()
15 15
	def params = getFilterParameters(ursql)
16 16
	def typeRegexp = params[0]
17 17
	def propRegexp = params[1]
18
	println "params=$params"
19 18
	return isPropertyDefined(clazz, analecCorpus, typeRegexp, propRegexp)
20 19
}
21 20

  
......
56 55
	if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE;
57 56
	if (minimum_schema_size < 0) minimum_schema_size = 0;
58 57
	def allSchemas = []
59
	
58

  
60 59
	if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
61 60
	else allSchemas = analecCorpus.getTousSchemas()
62 61

  
63 62
	if (debug >= 2) println "allSchemas=${allSchemas.size()}"
64 63
	allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size);
65
	
64

  
66 65
	return allSchemas
67 66
}
68 67

  
69 68
static def selectSchemasInCorpus(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus,
70
	String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) {
71
		
69
		String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) {
70

  
72 71
	if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE;
73 72
	if (minimum_schema_size < 0) minimum_schema_size = 0;
74
		
73

  
75 74
	def allSchemas = []
76 75
	if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
77 76
	else allSchemas = analecCorpus.getTousSchemas()
78
	
77

  
79 78
	def selectedSchemas = []
80 79
	for (Schema schema : allSchemas) {
81 80
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), strictInclusion, 0)
82
		
81

  
83 82
		if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
84 83
			selectedSchemas << schema
85 84
		}
86 85
	}
87
	
86

  
88 87
	return selectedSchemas
89 88
}
90 89

  
......
107 106
		String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size,
108 107
		String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position_in_matches) {
109 108
	def groupedUnits = []
110
	if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1) {
109
	if (schema_ursql != null && schema_ursql.length() > 0) {
111 110
		def allSchema = null;
112 111

  
113 112
		if (schema_ursql != null && schema_ursql.length() > 0) allSchema = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
114 113
		else allSchema = analecCorpus.getTousSchemas()
115 114
		if (debug >= 2) println "allSchema=${allSchema.size()}"
116 115

  
117
		allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size);
118
		if (debug >= 2) println "allSchema=${allSchema.size()}"
119

  
120 116
		groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql)
117
		if (debug >= 2) println "groupedUnits=${groupedUnits.size()}"
121 118
		
122
		if (position_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema)
119
		groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema)
120
		if (debug >= 2) println "groupedUnits=${groupedUnits.size()}"
123 121

  
122

  
124 123
	} else {
125 124
		groupedUnits = ["all":AnalecUtils.findAllInCorpus(debug, analecCorpus, Unite.class, unit_ursql)]
126 125
	}
......
139 138
	def allUnits = []
140 139
	for (def k : groupedUnits.keySet()) {
141 140
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position_in_matches)
142
		allUnits.addAll(selectedUnits)
141

  
142
		if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
143
			allUnits.addAll(selectedUnits)
144
		} else {
145

  
146
		}
143 147
	}
144 148
	if (debug >= 2) println "selectedUnits=${allUnits.size()}"
145 149

  
146 150
	Collections.sort(allUnits)
147
	
151

  
148 152
	return allUnits
149 153
}
150 154
/**
......
156 160
 * @return
157 161
 */
158 162
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) {
163
	println "dist=$distance"
159 164
	if (distance == 0) return groups;
160 165
	if (distance > 0) distance = distance-1;
161 166
	def newGroups = [:]
......
166 171
			continue;
167 172
		}
168 173
		def indexes = null
169
		if (distance > 0) {
174
		if (distance >= 0) {
170 175
			indexes = 0..Math.min(distance, group.size())
171 176
		} else {
172 177
			indexes = Math.max(distance, -group.size())..-1
173 178
		}
179

  
174 180
		newGroups[k] = group[indexes];
175 181
	}
176 182
	return newGroups
......
253 259

  
254 260
	def filteredElements = []
255 261
	for (Element e : elements) {
256
		Unite[] units = e.getUnitesSousjacentes();
257
		int size = units.length;
258
		if (size < minimum_schema_size) continue;
259
		if (size > maximum_schema_size) continue;
260
		filteredElements << e;
262
		Unite[] selectedUnits = e.getUnitesSousjacentes();
263
		int size = selectedUnits.length;
264
		if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
265
			filteredElements << e
266
		}
261 267
	}
262 268
	return filteredElements
263 269
}
......
288 294
	selectedUnitsPerMatch[iCurrentMatch] = selectedUnits
289 295

  
290 296
	while (iCurrentMatch < matchesSize && iCurrentUnit < unitsSize) {
291
		if (debug >= 2) println "** M $iCurrentMatch < $matchesSize && U $iCurrentUnit < $unitsSize"
297
		if (debug >= 3) println "** M $iCurrentMatch < $matchesSize && U $iCurrentUnit < $unitsSize"
292 298

  
293 299
		Unite unit = allUnites[iCurrentUnit]
294 300
		Match match = matches[iCurrentMatch]
295 301
		if (debug >= 3) println ""+unit.getDeb()+"->"+unit.getFin()+"	"+match.getStart()+"->"+match.getEnd()
296 302
		if (unit.getFin() < match.getStart()) {
297 303
			if (debug >= 3) "println next unit"
298
			
299
			iCurrentUnit++
304

  
305
				iCurrentUnit++
300 306
		} else if (unit.getDeb() > match.getEnd()) {
301 307
			if (debug >= 3) "println next match"
302
			
303
			iCurrentMatch++
308

  
309
				iCurrentMatch++
304 310
			selectedUnits = []
305 311
			selectedUnitsPerMatch[iCurrentMatch] = selectedUnits
306 312
		} else {
......
407 413
	if (!eq) {
408 414
		equal_start_idx--
409 415
	}
410
	 
416

  
411 417
	if (atidx >= 0 && equal_start_idx >= 0 && atidx < equal_start_idx) { // TYPE@PROP=VALUE
412 418
		type = URSQL.substring(0, atidx)
413 419
		prop = URSQL.substring(atidx+1, equal_start_idx)
......
432 438
}
433 439

  
434 440
static def filterElements(def debug, def allElements, String typeRegex, String propName, boolean eq, String valueRegex) {
435
	if (debug >= 2) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'"
441
	if (debug >= 3) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'"
436 442
	if (typeRegex != null && typeRegex.length() > 0) {
437 443
		def filteredElements = []
438 444
		def matcher = /$typeRegex/
......
444 450

  
445 451
		allElements = filteredElements;
446 452
	}
447
	if (debug >= 2) println " type step result: "+allElements.size()
453
	if (debug >= 3) println " type step result: "+allElements.size()
448 454

  
449 455
	if (propName != null && propName.length() > 0) {
450 456
		def filteredElements = []
......
468 474

  
469 475
		allElements = filteredElements;
470 476
	}
471
	if (debug >= 2) println " prop&value step result: "+allElements.size()
477
	if (debug >= 3) println " prop&value step result: "+allElements.size()
472 478
	return allElements;
473 479
}
474 480

  

Also available in: Unified diff