root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / AnalecUtils.groovy @ 2167
History | View | Annotate | Download (18.4 kB)
1 | 1217 | mdecorde | package org.txm.macro.urs
|
---|---|---|---|
2 | 671 | mdecorde | |
3 | 671 | mdecorde | import org.txm.searchengine.cqp.corpus.Property |
4 | 671 | mdecorde | import org.txm.searchengine.cqp.corpus.Subcorpus |
5 | 671 | mdecorde | import org.txm.searchengine.cqp.corpus.query.Match |
6 | 1061 | mdecorde | import org.txm.searchengine.cqp.corpus.query.CQLQuery |
7 | 671 | mdecorde | import visuAnalec.donnees.* |
8 | 671 | mdecorde | import visuAnalec.elements.* |
9 | 671 | mdecorde | |
10 | 671 | mdecorde | import org.apache.commons.lang.StringUtils |
11 | 671 | mdecorde | |
12 | 671 | mdecorde | |
13 | 671 | mdecorde | static def isPropertyDefined(Class clazz, Corpus analecCorpus, String ursql) { |
14 | 2167 | mdecorde | if (ursql == null || ursql.length() == 0) return new HashSet() |
15 | 671 | mdecorde | def params = getFilterParameters(ursql)
|
16 | 671 | mdecorde | def typeRegexp = params[0] |
17 | 671 | mdecorde | def propRegexp = params[1] |
18 | 671 | mdecorde | return isPropertyDefined(clazz, analecCorpus, typeRegexp, propRegexp)
|
19 | 671 | mdecorde | } |
20 | 671 | mdecorde | |
21 | 671 | mdecorde | static def isPropertyDefined(Class clazz, Corpus analecCorpus, String typeRegexp, String propRegexp) { |
22 | 671 | mdecorde | def errors = new HashSet() |
23 | 2093 | mdecorde | if (propRegexp == null || propRegexp.length() == 0) return errors; |
24 | 671 | mdecorde | Structure structure = analecCorpus.getStructure(); |
25 | 671 | mdecorde | for (def type : structure.getTypes(clazz)) { |
26 | 671 | mdecorde | if (!type.matches(typeRegexp)) continue; // test only types matching with typeRegexp |
27 | 671 | mdecorde | |
28 | 671 | mdecorde | def props = structure.getNomsProps(clazz, type);
|
29 | 671 | mdecorde | boolean contains = false; |
30 | 671 | mdecorde | for (def p : props) { |
31 | 671 | mdecorde | if (p.matches(propRegexp)) {
|
32 | 671 | mdecorde | contains = true
|
33 | 671 | mdecorde | } |
34 | 671 | mdecorde | } |
35 | 671 | mdecorde | if (!contains) errors << type
|
36 | 671 | mdecorde | } |
37 | 671 | mdecorde | |
38 | 671 | mdecorde | return errors
|
39 | 671 | mdecorde | } |
40 | 671 | mdecorde | |
41 | 671 | mdecorde | static def defineProperty(Class clazz, Corpus analecCorpus, String ursql, String newProperty) { |
42 | 671 | mdecorde | def params = getFilterParameters(ursql)
|
43 | 671 | mdecorde | def typeRegexp = params[0] |
44 | 671 | mdecorde | Structure structure = analecCorpus.getStructure(); |
45 | 671 | mdecorde | for (def type : structure.getTypes(clazz)) { |
46 | 671 | mdecorde | if (!type.matches(typeRegexp)) continue; // test only types matching with typeRegexp |
47 | 671 | mdecorde | def props = structure.getNomsProps(clazz, type)
|
48 | 671 | mdecorde | if (!props.contains(newProperty)) {
|
49 | 671 | mdecorde | structure.ajouterProp(clazz, type, newProperty) |
50 | 671 | mdecorde | } |
51 | 671 | mdecorde | } |
52 | 671 | mdecorde | } |
53 | 671 | mdecorde | |
54 | 1962 | mdecorde | static def selectSchemas(def debug, Corpus analecCorpus, String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size) { |
55 | 671 | mdecorde | if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
56 | 2144 | mdecorde | if (minimum_schema_size < 0) minimum_schema_size = 0; |
57 | 671 | mdecorde | def allSchemas = [] |
58 | 2167 | mdecorde | |
59 | 2144 | mdecorde | if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
60 | 2144 | mdecorde | else allSchemas = analecCorpus.getTousSchemas()
|
61 | 671 | mdecorde | |
62 | 2144 | mdecorde | if (debug >= 2) println "allSchemas=${allSchemas.size()}" |
63 | 2144 | mdecorde | allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size); |
64 | 2167 | mdecorde | |
65 | 671 | mdecorde | return allSchemas
|
66 | 671 | mdecorde | } |
67 | 671 | mdecorde | |
68 | 1094 | mdecorde | static def selectSchemasInCorpus(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
69 | 2167 | mdecorde | String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) { |
70 | 2167 | mdecorde | |
71 | 2144 | mdecorde | if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
72 | 2144 | mdecorde | if (minimum_schema_size < 0) minimum_schema_size = 0; |
73 | 2167 | mdecorde | |
74 | 2144 | mdecorde | def allSchemas = [] |
75 | 2144 | mdecorde | if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
76 | 2144 | mdecorde | else allSchemas = analecCorpus.getTousSchemas()
|
77 | 2167 | mdecorde | |
78 | 671 | mdecorde | def selectedSchemas = [] |
79 | 671 | mdecorde | for (Schema schema : allSchemas) { |
80 | 2144 | mdecorde | def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), strictInclusion, 0) |
81 | 2167 | mdecorde | |
82 | 2144 | mdecorde | if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
|
83 | 671 | mdecorde | selectedSchemas << schema |
84 | 671 | mdecorde | } |
85 | 671 | mdecorde | } |
86 | 2167 | mdecorde | |
87 | 671 | mdecorde | return selectedSchemas
|
88 | 671 | mdecorde | } |
89 | 671 | mdecorde | |
90 | 671 | mdecorde | /**
|
91 | 671 | mdecorde | * select units from a selection of schema. If no schema critera are given, select all units then apply units critera
|
92 | 671 | mdecorde | *
|
93 | 671 | mdecorde | * @param debug
|
94 | 671 | mdecorde | * @param analecCorpus
|
95 | 671 | mdecorde | * @param corpus
|
96 | 671 | mdecorde | * @param schema_ursql
|
97 | 671 | mdecorde | * @param minimum_schema_size
|
98 | 671 | mdecorde | * @param maximum_schema_size
|
99 | 671 | mdecorde | * @param unit_ursql
|
100 | 2094 | mdecorde | * @param cql_limit
|
101 | 671 | mdecorde | * @param strict_inclusion
|
102 | 2094 | mdecorde | * @param position
|
103 | 671 | mdecorde | * @return
|
104 | 671 | mdecorde | */
|
105 | 1094 | mdecorde | static def selectUnitsInSchema(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus, |
106 | 671 | mdecorde | String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, |
107 | 2145 | mdecorde | String unit_ursql, Integer position_in_schema, CQLQuery cql_limit, Boolean strict_inclusion, int position_in_matches) { |
108 | 671 | mdecorde | def groupedUnits = [] |
109 | 2167 | mdecorde | if (schema_ursql != null && schema_ursql.length() > 0) { |
110 | 671 | mdecorde | def allSchema = null; |
111 | 671 | mdecorde | |
112 | 671 | mdecorde | if (schema_ursql != null && schema_ursql.length() > 0) allSchema = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql) |
113 | 671 | mdecorde | else allSchema = analecCorpus.getTousSchemas()
|
114 | 671 | mdecorde | if (debug >= 2) println "allSchema=${allSchema.size()}" |
115 | 671 | mdecorde | |
116 | 671 | mdecorde | groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql) |
117 | 2167 | mdecorde | if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
118 | 671 | mdecorde | |
119 | 2167 | mdecorde | groupedUnits = AnalecUtils.filterUniteByInclusionInSchema(debug, groupedUnits, position_in_schema) |
120 | 2167 | mdecorde | if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
121 | 671 | mdecorde | |
122 | 2167 | mdecorde | |
123 | 671 | mdecorde | } else {
|
124 | 671 | mdecorde | groupedUnits = ["all":AnalecUtils.findAllInCorpus(debug, analecCorpus, Unite.class, unit_ursql)]
|
125 | 671 | mdecorde | } |
126 | 671 | mdecorde | if (debug >= 2) println "groupedUnits=${groupedUnits.size()}" |
127 | 671 | mdecorde | |
128 | 2144 | mdecorde | // limit units to corpus or cql_limit matches
|
129 | 671 | mdecorde | def matches = null |
130 | 2094 | mdecorde | if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) { |
131 | 2094 | mdecorde | Subcorpus limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getID().toUpperCase()) |
132 | 671 | mdecorde | matches = limitssubcorpus.getMatches(); |
133 | 671 | mdecorde | limitssubcorpus.delete(); |
134 | 671 | mdecorde | } else {
|
135 | 671 | mdecorde | matches = corpus.getMatches() |
136 | 671 | mdecorde | } |
137 | 671 | mdecorde | if (debug >= 2) println "matches=${matches}" |
138 | 671 | mdecorde | def allUnits = [] |
139 | 671 | mdecorde | for (def k : groupedUnits.keySet()) { |
140 | 2145 | mdecorde | def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position_in_matches)
|
141 | 2167 | mdecorde | |
142 | 2167 | mdecorde | if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
|
143 | 2167 | mdecorde | allUnits.addAll(selectedUnits) |
144 | 2167 | mdecorde | } else {
|
145 | 2167 | mdecorde | |
146 | 2167 | mdecorde | } |
147 | 671 | mdecorde | } |
148 | 671 | mdecorde | if (debug >= 2) println "selectedUnits=${allUnits.size()}" |
149 | 671 | mdecorde | |
150 | 671 | mdecorde | Collections.sort(allUnits)
|
151 | 2167 | mdecorde | |
152 | 671 | mdecorde | return allUnits
|
153 | 671 | mdecorde | } |
154 | 671 | mdecorde | /**
|
155 | 671 | mdecorde | * filter groups elements with the elements positions
|
156 | 671 | mdecorde | *
|
157 | 671 | mdecorde | *
|
158 | 2162 | mdecorde | * @param groups [schema:units list]
|
159 | 671 | mdecorde | * @param distance 0=no selection, 1=first, 2=second, -1 last, -2 last-last
|
160 | 671 | mdecorde | * @return
|
161 | 671 | mdecorde | */
|
162 | 671 | mdecorde | static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) { |
163 | 2167 | mdecorde | println "dist=$distance"
|
164 | 671 | mdecorde | if (distance == 0) return groups; |
165 | 2162 | mdecorde | if (distance > 0) distance = distance-1; |
166 | 671 | mdecorde | def newGroups = [:]
|
167 | 671 | mdecorde | for (def k : groups.keySet()) { |
168 | 671 | mdecorde | def group = groups[k]
|
169 | 2162 | mdecorde | if (group.size() == 0) { |
170 | 2162 | mdecorde | newGroups[k] = group; |
171 | 2162 | mdecorde | continue;
|
172 | 2162 | mdecorde | } |
173 | 2162 | mdecorde | def indexes = null |
174 | 2167 | mdecorde | if (distance >= 0) { |
175 | 2162 | mdecorde | indexes = 0..Math.min(distance, group.size()) |
176 | 2162 | mdecorde | } else {
|
177 | 2162 | mdecorde | indexes = Math.max(distance, -group.size())..-1 |
178 | 2162 | mdecorde | } |
179 | 2167 | mdecorde | |
180 | 2162 | mdecorde | newGroups[k] = group[indexes]; |
181 | 671 | mdecorde | } |
182 | 671 | mdecorde | return newGroups
|
183 | 671 | mdecorde | } |
184 | 671 | mdecorde | |
185 | 671 | mdecorde | static def getStartsEndsTargetsArrays(def selectedUnits) { |
186 | 671 | mdecorde | int[] starts = new int[selectedUnits.size()] |
187 | 671 | mdecorde | int[] ends = new int[selectedUnits.size()] |
188 | 671 | mdecorde | int n = 0; |
189 | 671 | mdecorde | for (def unite : selectedUnits) { |
190 | 671 | mdecorde | starts[n] = unite.getDeb(); |
191 | 671 | mdecorde | ends[n] = unite.getFin(); |
192 | 671 | mdecorde | n++ |
193 | 671 | mdecorde | } |
194 | 671 | mdecorde | return [starts, ends, null] |
195 | 671 | mdecorde | } |
196 | 671 | mdecorde | |
197 | 2144 | mdecorde | static int[] toIntArray(Unite u) { |
198 | 2144 | mdecorde | if (u.getDeb() > u.getFin()) // error |
199 | 2144 | mdecorde | return (u.getFin()..u.getDeb()).toArray(new int[u.getDeb()-u.getFin()]) |
200 | 2144 | mdecorde | else
|
201 | 2144 | mdecorde | return (u.getDeb()..u.getFin()).toArray(new int[u.getFin()-u.getDeb()]) |
202 | 2144 | mdecorde | } |
203 | 2144 | mdecorde | |
204 | 671 | mdecorde | static String toString(Element e) { |
205 | 671 | mdecorde | Schema r = null; |
206 | 671 | mdecorde | |
207 | 671 | mdecorde | if (e.getClass() == Unite.class)
|
208 | 671 | mdecorde | return sprintf("%d-%d, %s", e.getDeb(), e.getFin(), e.getProps().sort()) |
209 | 671 | mdecorde | else if (e.getClass() == Relation.class) |
210 | 671 | mdecorde | return sprintf("%s=%s -> %s", toString(e.getElt1()), toString(e.getElt2()), e.getProps().sort()) |
211 | 671 | mdecorde | else if (e.getClass() == Schema.class) |
212 | 671 | mdecorde | return sprintf("%s=%d", e.getContenu().size(), e.getProps().sort()) |
213 | 671 | mdecorde | } |
214 | 671 | mdecorde | |
215 | 671 | mdecorde | static String toString(def CQI, def wordProperty, Element e) { |
216 | 671 | mdecorde | Schema r = null; |
217 | 671 | mdecorde | |
218 | 671 | mdecorde | if (e.getClass() == Unite.class) {
|
219 | 671 | mdecorde | def form = StringUtils.join(CQI.cpos2Str(wordProperty.getQualifiedName(), toIntArray(e)), " ") |
220 | 671 | mdecorde | return sprintf("%s %d-%d, %s", form, e.getDeb(), e.getFin(), e.getProps().sort()) |
221 | 671 | mdecorde | } else if (e.getClass() == Relation.class) { |
222 | 671 | mdecorde | def form1 = StringUtils.join(CQI.cpos2Str(wordProperty.getQualifiedName(), toIntArray(e.getElt1())), " ") |
223 | 671 | mdecorde | def form2 = StringUtils.join(CQI.cpos2Str(wordProperty.getQualifiedName(), toIntArray(e.getElt2())), " ") |
224 | 671 | mdecorde | return sprintf("%s=%s -> %s", form1+" "+toString(e.getElt1()), form2+" "+toString(e.getElt2()), e.getProps().sort()) |
225 | 671 | mdecorde | } else if (e.getClass() == Schema.class) { |
226 | 671 | mdecorde | return sprintf("%s=%d", e.getContenu().size(), e.getProps().sort()) |
227 | 671 | mdecorde | } |
228 | 671 | mdecorde | } |
229 | 671 | mdecorde | |
230 | 671 | mdecorde | static def findAllInCorpus(def debug, def analecCorpus, Class elemClazz, String URSQL) { |
231 | 671 | mdecorde | def params = getFilterParameters(URSQL)
|
232 | 671 | mdecorde | if (debug >= 2) println "PARAMS=$params" |
233 | 2162 | mdecorde | return findAllInCorpus(debug, analecCorpus, elemClazz, params[0], params[1], params[2], params[3]) |
234 | 671 | mdecorde | } |
235 | 671 | mdecorde | |
236 | 2162 | mdecorde | static def findAllInCorpus(def debug, Corpus analecCorpus, Class elemClazz, String typeRegex, String propName, boolean eq, String valueRegex) { |
237 | 671 | mdecorde | def allElements = null; |
238 | 671 | mdecorde | |
239 | 671 | mdecorde | if (elemClazz != null) { |
240 | 671 | mdecorde | if (elemClazz == Unite.class)
|
241 | 671 | mdecorde | allElements = analecCorpus.getToutesUnites() |
242 | 671 | mdecorde | else if (elemClazz == Relation.class) |
243 | 671 | mdecorde | allElements = analecCorpus.getToutesRelations() |
244 | 671 | mdecorde | else if (elemClazz == Schema.class) |
245 | 671 | mdecorde | allElements = analecCorpus.getTousSchemas() |
246 | 671 | mdecorde | } else {
|
247 | 671 | mdecorde | allElements = [];
|
248 | 671 | mdecorde | allElements.addAll(analecCorpus.getToutesUnites()) |
249 | 671 | mdecorde | allElements.addAll(analecCorpus.getToutesRelations()) |
250 | 671 | mdecorde | allElements.addAll(analecCorpus.getTousSchemas()) |
251 | 671 | mdecorde | } |
252 | 671 | mdecorde | |
253 | 2162 | mdecorde | return filterElements(debug, allElements, typeRegex, propName, eq, valueRegex);
|
254 | 671 | mdecorde | } |
255 | 671 | mdecorde | |
256 | 2144 | mdecorde | static def filterBySize(def elements, Integer minimum_schema_size, Integer maximum_schema_size) { |
257 | 2134 | mdecorde | if (maximum_schema_size == null || maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE; |
258 | 2144 | mdecorde | if (minimum_schema_size == null || minimum_schema_size < 0) minimum_schema_size = 0; |
259 | 671 | mdecorde | |
260 | 671 | mdecorde | def filteredElements = [] |
261 | 671 | mdecorde | for (Element e : elements) { |
262 | 2167 | mdecorde | Unite[] selectedUnits = e.getUnitesSousjacentes();
|
263 | 2167 | mdecorde | int size = selectedUnits.length;
|
264 | 2167 | mdecorde | if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
|
265 | 2167 | mdecorde | filteredElements << e |
266 | 2167 | mdecorde | } |
267 | 671 | mdecorde | } |
268 | 671 | mdecorde | return filteredElements
|
269 | 671 | mdecorde | } |
270 | 671 | mdecorde | |
271 | 671 | mdecorde | /**
|
272 | 671 | mdecorde | * group units by CQP match
|
273 | 671 | mdecorde | *
|
274 | 671 | mdecorde | * units are sorted for faster processing
|
275 | 671 | mdecorde | *
|
276 | 671 | mdecorde | * @param allUnites
|
277 | 671 | mdecorde | * @param matches
|
278 | 671 | mdecorde | * @param strict_inclusion
|
279 | 671 | mdecorde | * @return
|
280 | 671 | mdecorde | */
|
281 | 671 | mdecorde | static def groupByMatch(def debug, def allUnites, def matches, boolean strict_inclusion) { |
282 | 671 | mdecorde | if (debug >= 2) println "group "+allUnites.size()+" units with "+matches.size()+" strict=$strict_inclusion" |
283 | 671 | mdecorde | //println allUnites.collect() {it -> it.getDeb()}
|
284 | 671 | mdecorde | allUnites = allUnites.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
285 | 671 | mdecorde | //println allUnites.collect() {it -> it.getDeb()}
|
286 | 671 | mdecorde | def unitsSize = allUnites.size()
|
287 | 671 | mdecorde | def iCurrentUnit = 0 |
288 | 671 | mdecorde | def selectedUnits = [] |
289 | 671 | mdecorde | |
290 | 671 | mdecorde | def matchesSize = matches.size()
|
291 | 671 | mdecorde | def iCurrentMatch = 0 |
292 | 671 | mdecorde | |
293 | 671 | mdecorde | def selectedUnitsPerMatch = new LinkedHashMap() |
294 | 671 | mdecorde | selectedUnitsPerMatch[iCurrentMatch] = selectedUnits |
295 | 671 | mdecorde | |
296 | 671 | mdecorde | while (iCurrentMatch < matchesSize && iCurrentUnit < unitsSize) {
|
297 | 2167 | mdecorde | if (debug >= 3) println "** M $iCurrentMatch < $matchesSize && U $iCurrentUnit < $unitsSize" |
298 | 671 | mdecorde | |
299 | 671 | mdecorde | Unite unit = allUnites[iCurrentUnit] |
300 | 671 | mdecorde | Match match = matches[iCurrentMatch] |
301 | 671 | mdecorde | if (debug >= 3) println ""+unit.getDeb()+"->"+unit.getFin()+" "+match.getStart()+"->"+match.getEnd() |
302 | 671 | mdecorde | if (unit.getFin() < match.getStart()) {
|
303 | 671 | mdecorde | if (debug >= 3) "println next unit" |
304 | 2167 | mdecorde | |
305 | 2167 | mdecorde | iCurrentUnit++ |
306 | 671 | mdecorde | } else if (unit.getDeb() > match.getEnd()) { |
307 | 671 | mdecorde | if (debug >= 3) "println next match" |
308 | 2167 | mdecorde | |
309 | 2167 | mdecorde | iCurrentMatch++ |
310 | 671 | mdecorde | selectedUnits = []
|
311 | 671 | mdecorde | selectedUnitsPerMatch[iCurrentMatch] = selectedUnits |
312 | 671 | mdecorde | } else {
|
313 | 671 | mdecorde | if (debug >= 3) println "iCurrentUnit=$iCurrentUnit iCurrentMatch=$iCurrentMatch" |
314 | 671 | mdecorde | if (strict_inclusion) {
|
315 | 671 | mdecorde | |
316 | 671 | mdecorde | if (debug >= 3) println "m.start ${match.getStart()} <= u.deb ${unit.getDeb()} && u.fin ${unit.getFin()} <= m.end ${match.getEnd()}" |
317 | 671 | mdecorde | if (match.getStart() <= unit.getDeb() && unit.getFin() <= match.getEnd()) {
|
318 | 671 | mdecorde | selectedUnits << unit |
319 | 671 | mdecorde | } |
320 | 671 | mdecorde | } else {
|
321 | 671 | mdecorde | selectedUnits << unit |
322 | 671 | mdecorde | } |
323 | 671 | mdecorde | |
324 | 671 | mdecorde | iCurrentUnit++ |
325 | 671 | mdecorde | } |
326 | 671 | mdecorde | } |
327 | 671 | mdecorde | return selectedUnitsPerMatch
|
328 | 671 | mdecorde | } |
329 | 671 | mdecorde | |
330 | 2094 | mdecorde | static def filterUniteByInclusion(def debug, def allUnites, def matches, boolean strict_inclusion, int position) { |
331 | 671 | mdecorde | |
332 | 671 | mdecorde | def selectedUnitsPerMatch = groupByMatch(debug, allUnites, matches, strict_inclusion);
|
333 | 671 | mdecorde | //println "selectedUnitsPerMatch size="+selectedUnitsPerMatch.size()
|
334 | 671 | mdecorde | def selectedUnits = [] |
335 | 2094 | mdecorde | if (position != 0) { |
336 | 2094 | mdecorde | if (position > 0) position-- |
337 | 671 | mdecorde | |
338 | 671 | mdecorde | for (def m : selectedUnitsPerMatch.keySet()) { |
339 | 2094 | mdecorde | if (selectedUnitsPerMatch[m].size() > position && selectedUnitsPerMatch[m].size() > 0) { |
340 | 671 | mdecorde | def units = selectedUnitsPerMatch[m]
|
341 | 671 | mdecorde | //println "$m -> "+units.collect() {it -> it.getDeb()}
|
342 | 671 | mdecorde | units = units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
343 | 671 | mdecorde | //println "$m -> "+units.collect() {it -> it.getDeb()}
|
344 | 2094 | mdecorde | selectedUnits << units[position] |
345 | 2094 | mdecorde | if (debug >=3) println "dist select: "+units[position].getDeb() |
346 | 671 | mdecorde | } |
347 | 671 | mdecorde | } |
348 | 671 | mdecorde | } else {
|
349 | 671 | mdecorde | for (def m : selectedUnitsPerMatch.keySet()) selectedUnits.addAll(selectedUnitsPerMatch[m]) |
350 | 671 | mdecorde | } |
351 | 671 | mdecorde | |
352 | 671 | mdecorde | return selectedUnits
|
353 | 671 | mdecorde | } |
354 | 671 | mdecorde | |
355 | 671 | mdecorde | static def findAllUnitesInElements(def debug, def elements, String URSQL) { |
356 | 671 | mdecorde | def params = getFilterParameters(URSQL)
|
357 | 2162 | mdecorde | return findAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3]) |
358 | 671 | mdecorde | } |
359 | 671 | mdecorde | |
360 | 2162 | mdecorde | static def findAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) { |
361 | 671 | mdecorde | def allElements = [] |
362 | 671 | mdecorde | |
363 | 671 | mdecorde | for (Element element : elements) { |
364 | 2162 | mdecorde | allElements.addAll(filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex)); |
365 | 671 | mdecorde | } |
366 | 671 | mdecorde | |
367 | 671 | mdecorde | return allElements;
|
368 | 671 | mdecorde | } |
369 | 671 | mdecorde | |
370 | 671 | mdecorde | /**
|
371 | 671 | mdecorde | * group all units without selection
|
372 | 671 | mdecorde | *
|
373 | 671 | mdecorde | * @param elements
|
374 | 671 | mdecorde | * @return
|
375 | 671 | mdecorde | */
|
376 | 671 | mdecorde | static def groupAllUnitesInElements(def debug, def elements) { |
377 | 671 | mdecorde | return groupAllUnitesInElements(debug, elements, "","","") |
378 | 671 | mdecorde | } |
379 | 671 | mdecorde | |
380 | 671 | mdecorde | /**
|
381 | 671 | mdecorde | * group all units with URSQL selection
|
382 | 671 | mdecorde | *
|
383 | 671 | mdecorde | * @param elements
|
384 | 671 | mdecorde | * @param URSQL
|
385 | 671 | mdecorde | * @return
|
386 | 671 | mdecorde | */
|
387 | 671 | mdecorde | static def groupAllUnitesInElements(def debug, def elements, String URSQL) { |
388 | 671 | mdecorde | def params = getFilterParameters(URSQL)
|
389 | 2162 | mdecorde | return groupAllUnitesInElements(debug, elements, params[0], params[1], params[2], params[3]) |
390 | 671 | mdecorde | } |
391 | 671 | mdecorde | |
392 | 2162 | mdecorde | static def groupAllUnitesInElements(def debug, def elements, String typeRegex, String propName, boolean eq, String valueRegex) { |
393 | 671 | mdecorde | def allElements = [:]
|
394 | 671 | mdecorde | |
395 | 671 | mdecorde | for (Element element : elements) { |
396 | 2162 | mdecorde | allElements[element] = filterElements(debug, element.getUnitesSousjacentes(), typeRegex, propName, eq, valueRegex); |
397 | 671 | mdecorde | } |
398 | 671 | mdecorde | |
399 | 671 | mdecorde | return allElements;
|
400 | 671 | mdecorde | } |
401 | 671 | mdecorde | |
402 | 671 | mdecorde | static def getFilterParameters(String URSQL) { |
403 | 671 | mdecorde | |
404 | 671 | mdecorde | String type = ""; |
405 | 671 | mdecorde | String prop = ""; |
406 | 671 | mdecorde | String value = ""; |
407 | 671 | mdecorde | |
408 | 671 | mdecorde | int atidx = URSQL.indexOf("@"); |
409 | 2162 | mdecorde | int equal_start_idx = URSQL.indexOf("="); |
410 | 2162 | mdecorde | int equal_end_idx = equal_start_idx
|
411 | 2162 | mdecorde | int differentidx = URSQL.indexOf("!="); |
412 | 2162 | mdecorde | boolean eq = differentidx < 0 || differentidx != equal_start_idx-1 |
413 | 2162 | mdecorde | if (!eq) {
|
414 | 2162 | mdecorde | equal_start_idx-- |
415 | 2162 | mdecorde | } |
416 | 2167 | mdecorde | |
417 | 2162 | mdecorde | if (atidx >= 0 && equal_start_idx >= 0 && atidx < equal_start_idx) { // TYPE@PROP=VALUE |
418 | 671 | mdecorde | type = URSQL.substring(0, atidx)
|
419 | 2162 | mdecorde | prop = URSQL.substring(atidx+1, equal_start_idx)
|
420 | 2162 | mdecorde | value = URSQL.substring(equal_end_idx+1)
|
421 | 2094 | mdecorde | } else if (atidx >= 0) { // TYPE@PROP |
422 | 671 | mdecorde | type = URSQL.substring(0, atidx)
|
423 | 671 | mdecorde | prop = URSQL.substring(atidx+1)
|
424 | 2162 | mdecorde | } else if (equal_start_idx >= 0) { // TYPE=VALUE -> not well formed |
425 | 2162 | mdecorde | type = URSQL.substring(0, equal_start_idx)
|
426 | 2162 | mdecorde | value = URSQL.substring(equal_end_idx+1)
|
427 | 2094 | mdecorde | } else { // TYPE |
428 | 671 | mdecorde | type = URSQL; |
429 | 671 | mdecorde | } |
430 | 671 | mdecorde | // println(["'"+type+"'", "'"+prop+"'", "'"+value+"'"])
|
431 | 671 | mdecorde | |
432 | 2162 | mdecorde | return [type, prop, eq, value]
|
433 | 671 | mdecorde | } |
434 | 671 | mdecorde | |
435 | 671 | mdecorde | static def filterElements(def debug, def allElements, String URSQL) { |
436 | 671 | mdecorde | def params = getFilterParameters(URSQL)
|
437 | 2162 | mdecorde | return filterElements(debug, allElements, params[0], params[1], params[2], params[3]) |
438 | 671 | mdecorde | } |
439 | 671 | mdecorde | |
440 | 2162 | mdecorde | static def filterElements(def debug, def allElements, String typeRegex, String propName, boolean eq, String valueRegex) { |
441 | 2167 | mdecorde | if (debug >= 3) println "filtering "+allElements.size()+" elements with typeRegex='$typeRegex' propName='$propName' and valueRegex='$valueRegex'" |
442 | 671 | mdecorde | if (typeRegex != null && typeRegex.length() > 0) { |
443 | 671 | mdecorde | def filteredElements = [] |
444 | 671 | mdecorde | def matcher = /$typeRegex/ |
445 | 671 | mdecorde | for (Element element : allElements) { |
446 | 671 | mdecorde | if (element.getType() ==~ matcher) {
|
447 | 671 | mdecorde | filteredElements << element |
448 | 671 | mdecorde | } |
449 | 671 | mdecorde | } |
450 | 671 | mdecorde | |
451 | 671 | mdecorde | allElements = filteredElements; |
452 | 671 | mdecorde | } |
453 | 2167 | mdecorde | if (debug >= 3) println " type step result: "+allElements.size() |
454 | 671 | mdecorde | |
455 | 671 | mdecorde | if (propName != null && propName.length() > 0) { |
456 | 671 | mdecorde | def filteredElements = [] |
457 | 671 | mdecorde | if (valueRegex != null && valueRegex.length() > 0) { // select only elements with the prop&value |
458 | 671 | mdecorde | def matcher = /$valueRegex/ |
459 | 671 | mdecorde | for (Element element : allElements) { |
460 | 671 | mdecorde | def value = element.getProp(propName)
|
461 | 2162 | mdecorde | if (value ==~ matcher) {
|
462 | 2162 | mdecorde | if (eq) filteredElements << element
|
463 | 2162 | mdecorde | } else {
|
464 | 2162 | mdecorde | if (!eq) filteredElements << element
|
465 | 671 | mdecorde | } |
466 | 671 | mdecorde | } |
467 | 671 | mdecorde | } else { // select only elements with the prop |
468 | 671 | mdecorde | for (Element element : allElements) { |
469 | 671 | mdecorde | if (element.getProps().containsKey(propName)) {
|
470 | 671 | mdecorde | filteredElements << element |
471 | 671 | mdecorde | } |
472 | 671 | mdecorde | } |
473 | 671 | mdecorde | } |
474 | 671 | mdecorde | |
475 | 671 | mdecorde | allElements = filteredElements; |
476 | 671 | mdecorde | } |
477 | 2167 | mdecorde | if (debug >= 3) println " prop&value step result: "+allElements.size() |
478 | 671 | mdecorde | return allElements;
|
479 | 671 | mdecorde | } |
480 | 671 | mdecorde | |
481 | 671 | mdecorde | static def getCQL(String name, def unites) { |
482 | 671 | mdecorde | return getCQL(name, unites, false, true) |
483 | 671 | mdecorde | } |
484 | 671 | mdecorde | |
485 | 671 | mdecorde | /**
|
486 | 671 | mdecorde | *
|
487 | 671 | mdecorde | * @param name
|
488 | 671 | mdecorde | * @param unites
|
489 | 671 | mdecorde | * @param onePosition to return 1 token per patch
|
490 | 671 | mdecorde | * @return
|
491 | 671 | mdecorde | */
|
492 | 671 | mdecorde | static def getCQL(String name, def unites, boolean onePosition, boolean limitNumberOfUnit) { |
493 | 671 | mdecorde | //println "GETCQL of $name"
|
494 | 671 | mdecorde | def letters = "abcdefghijklmnopqrstu"//vwxyz0123456789" |
495 | 671 | mdecorde | def MAXCQLQUERYSIZE = 1200 // 1150 // 1200 in fact |
496 | 671 | mdecorde | |
497 | 671 | mdecorde | HashSet<Integer> sizes = new HashSet<>() |
498 | 671 | mdecorde | |
499 | 671 | mdecorde | for (Unite unite : unites) {
|
500 | 671 | mdecorde | int size = unite.getFin() - unite.getDeb()+1 |
501 | 671 | mdecorde | if (size > letters.length()) size = letters.length()-1 |
502 | 671 | mdecorde | sizes.add(size) |
503 | 671 | mdecorde | } |
504 | 671 | mdecorde | |
505 | 671 | mdecorde | int n = 0 |
506 | 671 | mdecorde | |
507 | 671 | mdecorde | String totalleftquery = "" |
508 | 671 | mdecorde | String totalrightquery = "" |
509 | 671 | mdecorde | unites.sort() { it.getDeb() }
|
510 | 671 | mdecorde | def declaredsizes = [] |
511 | 671 | mdecorde | for (Unite unite : unites) {
|
512 | 671 | mdecorde | int size = unite.getFin() - unite.getDeb() + 1 |
513 | 671 | mdecorde | if (size < 0) { |
514 | 671 | mdecorde | println sprintf("** Warning: incoherent unit %s [%d, %d], size = "+size, unite.getProps(),unite.getDeb(), unite.getFin())
|
515 | 671 | mdecorde | continue
|
516 | 671 | mdecorde | } |
517 | 671 | mdecorde | if (onePosition) size = 1 // hack only the 1st position is needed for the Progression |
518 | 671 | mdecorde | if (size > letters.length()) size = letters.length()-1 |
519 | 671 | mdecorde | String letter = ""+letters.charAt(size-1) |
520 | 671 | mdecorde | String rightquery = letter+"="+unite.getDeb() |
521 | 671 | mdecorde | |
522 | 671 | mdecorde | String leftquery = "" |
523 | 671 | mdecorde | if (!declaredsizes.contains(size)) {
|
524 | 671 | mdecorde | declaredsizes << size |
525 | 671 | mdecorde | |
526 | 671 | mdecorde | if (size == 1) |
527 | 671 | mdecorde | leftquery = letter+":[]"
|
528 | 671 | mdecorde | else if (size == 2) |
529 | 671 | mdecorde | leftquery = letter+":[][]"
|
530 | 671 | mdecorde | else if (size == 3) |
531 | 671 | mdecorde | leftquery = letter+":[][][]" // [][][][] |
532 | 671 | mdecorde | else
|
533 | 671 | mdecorde | leftquery = letter+":[][]{"+(size-1)+"}" // [][]{4} |
534 | 671 | mdecorde | } |
535 | 671 | mdecorde | |
536 | 671 | mdecorde | if ((totalleftquery.length() + totalrightquery.length() + 2 |
537 | 671 | mdecorde | + leftquery.length() + rightquery.length()) >= MAXCQLQUERYSIZE) { |
538 | 671 | mdecorde | System.out.println("** $name : trop d'éléments pour la requête. Seuls les "+n+" premiers éléments sur ${unites.size()} seront affichés dans le graphique de progression.") |
539 | 671 | mdecorde | break
|
540 | 671 | mdecorde | } |
541 | 671 | mdecorde | |
542 | 671 | mdecorde | if (n > 0) { |
543 | 671 | mdecorde | if (leftquery.length() > 0) totalleftquery += "|" |
544 | 671 | mdecorde | totalrightquery += "|"
|
545 | 671 | mdecorde | } |
546 | 671 | mdecorde | if (leftquery.length() > 0) totalleftquery += leftquery |
547 | 671 | mdecorde | totalrightquery += rightquery |
548 | 671 | mdecorde | |
549 | 671 | mdecorde | n += 1
|
550 | 671 | mdecorde | } |
551 | 671 | mdecorde | String query = totalleftquery+"::"+totalrightquery |
552 | 671 | mdecorde | //println query
|
553 | 671 | mdecorde | return query
|
554 | 671 | mdecorde | } |