Revision 2144 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy

AnalecUtils.groovy (revision 2144)
53 53

  
54 54
static def selectSchemas(def debug, Corpus analecCorpus, String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size) {
55 55
	if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE;
56
	if (minimum_schema_size < 0) minimum_schema_size = 0;
56 57
	def allSchemas = []
57
	if (schema_ursql != null && schema_ursql.length() > 0 || minimum_schema_size > 1 || maximum_schema_size >= 1) {
58
	
59
	if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
60
	else allSchemas = analecCorpus.getTousSchemas()
58 61

  
59
		if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
60
		else allSchemas = analecCorpus.getTousSchemas()
61

  
62
		if (debug >= 2) println "allSchemas=${allSchemas.size()}"
63
		if (minimum_schema_size > 1 || maximum_schema_size >= 1) allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size);
64
	} else {
65
		allSchemas = analecCorpus.getTousSchemas()
66
	}
62
	if (debug >= 2) println "allSchemas=${allSchemas.size()}"
63
	allSchemas = AnalecUtils.filterBySize(allSchemas, minimum_schema_size, maximum_schema_size);
64
	
67 65
	return allSchemas
68 66
}
69 67

  
70 68
static def selectSchemasInCorpus(def debug, Corpus analecCorpus, org.txm.searchengine.cqp.corpus.CQPCorpus corpus,
71
		String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size) {
69
	String schema_ursql, Integer minimum_schema_size, Integer maximum_schema_size, boolean strictInclusion) {
72 70
		
73
	def allSchemas = AnalecUtils.selectSchemas(debug, analecCorpus, schema_ursql, minimum_schema_size, maximum_schema_size);
71
	if (maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE;
72
	if (minimum_schema_size < 0) minimum_schema_size = 0;
73
		
74
	def allSchemas = []
75
	if (schema_ursql != null && schema_ursql.length() > 0) allSchemas = AnalecUtils.findAllInCorpus(debug, analecCorpus, Schema.class, schema_ursql)
76
	else allSchemas = analecCorpus.getTousSchemas()
74 77
	
75 78
	def selectedSchemas = []
76 79
	for (Schema schema : allSchemas) {
77
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), true, 0)
78
		if (selectedUnits.size() > 0 ) {
80
		def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, schema.getUnitesSousjacentes(), corpus.getMatches(), strictInclusion, 0)
81
		
82
		if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
79 83
			selectedSchemas << schema
80 84
		}
81 85
	}
......
109 113
		else allSchema = analecCorpus.getTousSchemas()
110 114
		if (debug >= 2) println "allSchema=${allSchema.size()}"
111 115

  
112
		if (minimum_schema_size > 1) allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size);
116
		allSchema = AnalecUtils.filterBySize(allSchema, minimum_schema_size, maximum_schema_size);
113 117
		if (debug >= 2) println "allSchema=${allSchema.size()}"
114 118

  
115 119
		groupedUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchema, unit_ursql)
......
121 125
	}
122 126
	if (debug >= 2) println "groupedUnits=${groupedUnits.size()}"
123 127

  
128
	// limit units to corpus or cql_limit matches
124 129
	def matches = null
125 130
	if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
126 131
		Subcorpus limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getID().toUpperCase())
......
151 156
 */
152 157
static def filterUniteByInclusionInSchema(def debug, def groups, Integer distance) {
153 158
	if (distance == 0) return groups;
159
	
154 160
	distance = distance-1;
155 161
	def newGroups = [:]
156 162
	for (def k : groups.keySet()) {
......
174 180
	return [starts, ends, null]
175 181
}
176 182

  
183
static int[] toIntArray(Unite u) {
184
	if (u.getDeb() > u.getFin()) // error
185
		return (u.getFin()..u.getDeb()).toArray(new int[u.getDeb()-u.getFin()])
186
	else
187
		return (u.getDeb()..u.getFin()).toArray(new int[u.getFin()-u.getDeb()])
188
}
189

  
177 190
static String toString(Element e) {
178 191
	Schema r = null;
179 192

  
......
185 198
		return sprintf("%s=%d", e.getContenu().size(), e.getProps().sort())
186 199
}
187 200

  
188
static int[] toIntArray(Unite u) {
189
	if (u.getDeb() > u.getFin()) // error
190
		return (u.getFin()..u.getDeb()).toArray(new int[u.getDeb()-u.getFin()])
191
	else
192
		return (u.getDeb()..u.getFin()).toArray(new int[u.getFin()-u.getDeb()])
193
}
194

  
195 201
static String toString(def CQI, def wordProperty, Element e) {
196 202
	Schema r = null;
197 203

  
......
230 236
		allElements.addAll(analecCorpus.getTousSchemas())
231 237
	}
232 238

  
233

  
234 239
	return filterElements(debug, allElements, typeRegex, propName, valueRegex);
235 240
}
236 241

  
237
static def filterBySize(def elements, Integer minSize, Integer maximum_schema_size) {
242
static def filterBySize(def elements, Integer minimum_schema_size, Integer maximum_schema_size) {
238 243
	if (maximum_schema_size == null || maximum_schema_size <= 0) maximum_schema_size = Integer.MAX_VALUE;
239
	if (minSize == null || minSize < 0) minSize = 0;
244
	if (minimum_schema_size == null || minimum_schema_size < 0) minimum_schema_size = 0;
240 245

  
241 246
	def filteredElements = []
242 247
	for (Element e : elements) {
243 248
		Unite[] units = e.getUnitesSousjacentes();
244 249
		int size = units.length;
245
		if (size < minSize) continue;
250
		if (size < minimum_schema_size) continue;
246 251
		if (size > maximum_schema_size) continue;
247 252
		filteredElements << e;
248 253
	}

Also available in: Unified diff