Revision 2167 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy
AnalecUtils.groovy (revision 2167) | ||
---|---|---|
11 | 11 |
|
12 | 12 |
|
13 | 13 |
static def isPropertyDefined(Class clazz, Corpus analecCorpus, String ursql) { |
14 |
if (ursql == null || ursql.length() == 0) return new HashSet()
|
|
14 |
if (ursql == null || ursql.length() == 0) return new HashSet() |
|
15 | 15 |
def params = getFilterParameters(ursql) |
16 | 16 |
def typeRegexp = params[0] |
17 | 17 |
def propRegexp = params[1] |
18 |
println "params=$params" |
|
19 | 18 |
return isPropertyDefined(clazz, analecCorpus, typeRegexp, propRegexp) |
20 | 19 |
} |
21 | 20 |
|
... | ... | |
56 | 55 |
if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
57 | 56 |
if (minimum_schema_size < 0) minimum_schema_size = 0; |
58 | 57 |
def allSchemas = [] |
59 |
|
|
58 |
|
|
60 | 59 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
61 | 60 |
else allSchemas = analecCorpus.getTousSchemas() |
62 | 61 |
|
63 | 62 |
if (debug >= 2) println "allSchemas=${allSchemas.size()}" |
64 | 63 |
allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size); |
65 |
|
|
64 |
|
|
66 | 65 |
return allSchemas |
67 | 66 |
} |
68 | 67 |
|
69 | 68 |
static def selectSchemasInCorpus(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
70 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) { |
|
71 |
|
|
69 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) {
|
|
70 |
|
|
72 | 71 |
if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
73 | 72 |
if (minimum_schema_size < 0) minimum_schema_size = 0; |
74 |
|
|
73 |
|
|
75 | 74 |
def allSchemas = [] |
76 | 75 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
77 | 76 |
else allSchemas = analecCorpus.getTousSchemas() |
78 |
|
|
77 |
|
|
79 | 78 |
def selectedSchemas = [] |
80 | 79 |
for (Schema schema : allSchemas) { |
81 | 80 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), strictInclusion, 0) |
82 |
|
|
81 |
|
|
83 | 82 |
if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) { |
84 | 83 |
selectedSchemas << schema |
85 | 84 |
} |
86 | 85 |
} |
87 |
|
|
86 |
|
|
88 | 87 |
return selectedSchemas |
89 | 88 |
} |
90 | 89 |
|
... | ... | |
107 | 106 |
String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, |
108 | 107 |
String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position_in_matches) { |
109 | 108 |
def groupedUnits = [] |
110 |
if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1) {
|
|
109 |
if (schema_ursql != null && schema_ursql.length() > 0) { |
|
111 | 110 |
def allSchema = null; |
112 | 111 |
|
113 | 112 |
if (schema_ursql != null && schema_ursql.length() > 0) allSchema = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
114 | 113 |
else allSchema = analecCorpus.getTousSchemas() |
115 | 114 |
if (debug >= 2) println "allSchema=${allSchema.size()}" |
116 | 115 |
|
117 |
allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size); |
|
118 |
if (debug >= 2) println "allSchema=${allSchema.size()}" |
|
119 |
|
|
120 | 116 |
groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql) |
117 |
if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
|
121 | 118 |
|
122 |
if (position_in_schema >= 0) groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema) |
|
119 |
groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema) |
|
120 |
if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
|
123 | 121 |
|
122 |
|
|
124 | 123 |
} else { |
125 | 124 |
groupedUnits = ["all":AnalecUtils.findAllInCorpus(debug, analecCorpus, Unite.class, unit_ursql)] |
126 | 125 |
} |
... | ... | |
139 | 138 |
def allUnits = [] |
140 | 139 |
for (def k : groupedUnits.keySet()) { |
141 | 140 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position_in_matches) |
142 |
allUnits.addAll(selectedUnits) |
|
141 |
|
|
142 |
if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) { |
|
143 |
allUnits.addAll(selectedUnits) |
|
144 |
} else { |
|
145 |
|
|
146 |
} |
|
143 | 147 |
} |
144 | 148 |
if (debug >= 2) println "selectedUnits=${allUnits.size()}" |
145 | 149 |
|
146 | 150 |
Collections.sort(allUnits) |
147 |
|
|
151 |
|
|
148 | 152 |
return allUnits |
149 | 153 |
} |
150 | 154 |
/** |
... | ... | |
156 | 160 |
* @return |
157 | 161 |
*/ |
158 | 162 |
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) { |
163 |
println "dist=$distance" |
|
159 | 164 |
if (distance == 0) return groups; |
160 | 165 |
if (distance > 0) distance = distance-1; |
161 | 166 |
def newGroups = [:] |
... | ... | |
166 | 171 |
continue; |
167 | 172 |
} |
168 | 173 |
def indexes = null |
169 |
if (distance > 0) { |
|
174 |
if (distance >= 0) {
|
|
170 | 175 |
indexes = 0..Math.min(distance, group.size()) |
171 | 176 |
} else { |
172 | 177 |
indexes = Math.max(distance, -group.size())..-1 |
173 | 178 |
} |
179 |
|
|
174 | 180 |
newGroups[k] = group[indexes]; |
175 | 181 |
} |
176 | 182 |
return newGroups |
... | ... | |
253 | 259 |
|
254 | 260 |
def filteredElements = [] |
255 | 261 |
for (Element e : elements) { |
256 |
Unite[] units = e.getUnitesSousjacentes();
|
|
257 |
int size = units.length;
|
|
258 |
if (size < minimum_schema_size) continue;
|
|
259 |
if (size > maximum_schema_size) continue;
|
|
260 |
filteredElements << e;
|
|
262 |
Unite[] selectedUnits = e.getUnitesSousjacentes();
|
|
263 |
int size = selectedUnits.length;
|
|
264 |
if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
|
|
265 |
filteredElements << e
|
|
266 |
}
|
|
261 | 267 |
} |
262 | 268 |
return filteredElements |
263 | 269 |
} |
... | ... | |
288 | 294 |
selectedUnitsPerMatch[iCurrentMatch] = selectedUnits |
289 | 295 |
|
290 | 296 |
while (iCurrentMatch < matchesSize && iCurrentUnit < unitsSize) { |
291 |
if (debug >= 2) println "** M $iCurrentMatch < $matchesSize && U $iCurrentUnit < $unitsSize"
|
|
297 |
if (debug >= 3) println "** M $iCurrentMatch < $matchesSize && U $iCurrentUnit < $unitsSize"
|
|
292 | 298 |
|
293 | 299 |
Unite unit = allUnites[iCurrentUnit] |
294 | 300 |
Match match = matches[iCurrentMatch] |
295 | 301 |
if (debug >= 3) println ""+unit.getDeb()+"->"+unit.getFin()+" "+match.getStart()+"->"+match.getEnd() |
296 | 302 |
if (unit.getFin() < match.getStart()) { |
297 | 303 |
if (debug >= 3) "println next unit" |
298 |
|
|
299 |
iCurrentUnit++ |
|
304 |
|
|
305 |
iCurrentUnit++
|
|
300 | 306 |
} else if (unit.getDeb() > match.getEnd()) { |
301 | 307 |
if (debug >= 3) "println next match" |
302 |
|
|
303 |
iCurrentMatch++ |
|
308 |
|
|
309 |
iCurrentMatch++
|
|
304 | 310 |
selectedUnits = [] |
305 | 311 |
selectedUnitsPerMatch[iCurrentMatch] = selectedUnits |
306 | 312 |
} else { |
... | ... | |
407 | 413 |
if (!eq) { |
408 | 414 |
equal_start_idx-- |
409 | 415 |
} |
410 |
|
|
416 |
|
|
411 | 417 |
if (atidx >= 0 && equal_start_idx >= 0 && atidx < equal_start_idx) { // TYPE@PROP=VALUE |
412 | 418 |
type = URSQL.substring(0, atidx) |
413 | 419 |
prop = URSQL.substring(atidx+1, equal_start_idx) |
... | ... | |
432 | 438 |
} |
433 | 439 |
|
434 | 440 |
static def filterElements(def debug, def allElements, String typeRegex, String propName, boolean eq, String valueRegex) { |
435 |
if (debug >= 2) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'"
|
|
441 |
if (debug >= 3) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'"
|
|
436 | 442 |
if (typeRegex != null && typeRegex.length() > 0) { |
437 | 443 |
def filteredElements = [] |
438 | 444 |
def matcher = /$typeRegex/ |
... | ... | |
444 | 450 |
|
445 | 451 |
allElements = filteredElements; |
446 | 452 |
} |
447 |
if (debug >= 2) println " type step result: "+allElements.size()
|
|
453 |
if (debug >= 3) println " type step result: "+allElements.size()
|
|
448 | 454 |
|
449 | 455 |
if (propName != null && propName.length() > 0) { |
450 | 456 |
def filteredElements = [] |
... | ... | |
468 | 474 |
|
469 | 475 |
allElements = filteredElements; |
470 | 476 |
} |
471 |
if (debug >= 2) println " prop&value step result: "+allElements.size()
|
|
477 |
if (debug >= 3) println " prop&value step result: "+allElements.size()
|
|
472 | 478 |
return allElements; |
473 | 479 |
} |
474 | 480 |
|
Also available in: Unified diff