Révision 321
tmp/org.txm.groovy.core/src/groovy/tests/advancedPartition.groovy (revision 321) | ||
---|---|---|
1 |
package tests |
|
2 |
|
|
3 |
import org.txm.rcpapplication.views.* |
|
4 |
import org.txm.searchengine.cqp.corpus.* |
|
5 |
|
|
6 |
|
|
7 |
/* |
|
8 |
import java.awt.Dimension |
|
9 |
import java.util.UUID; |
|
10 |
|
|
11 |
import javax.swing.JFrame; |
|
12 |
import javax.swing.JOptionPane; |
|
13 |
|
|
14 |
import org.txm.Toolbox |
|
15 |
import org.txm.searchengine.cqp.MemCqiClient |
|
16 |
import org.txm.searchengine.cqp.MemCqiServer |
|
17 |
import org.txm.searchengine.cqp.corpus.* |
|
18 |
import org.txm.functions.ReferencePattern |
|
19 |
import org.txm.rcpapplication.views.CorporaView |
|
20 |
import org.txm.searchengine.cqp.corpus.query.Query |
|
21 |
*/ |
|
22 |
|
|
23 |
|
|
24 |
/** |
|
25 |
* Create partition with advanced CQL queries and autoname the parts. |
|
26 |
* Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit. |
|
27 |
* Can define some structural units to remove from the resulting parts subcorpus. |
|
28 |
* |
|
29 |
*/ |
|
30 |
|
|
31 |
// TODO : Log |
|
32 |
println "******************************** Starting ********************************************"; |
|
33 |
|
|
34 |
|
|
35 |
// *************************** User parameters ************************************************************ |
|
36 |
|
|
37 |
|
|
38 |
// Test 1 : partition en excluant des sections |
|
39 |
def CORPUS_NAME = "LIVRETOPERA2" // The name of the corpus to partition |
|
40 |
|
|
41 |
def SUBCORPUS_NAME = null // The name of the subcorpus to partition |
|
42 |
//def SUBCORPUS_NAME = "TEST" // The name of the subcorpus to partition if needed |
|
43 |
// FIXME: subcorpus management is bugged, when trying to create a CA on a Partition created on a subcorpus, it doesn't work because of the getLexicon() method which throws a CqiCqpErrorErrorGeneral exception |
|
44 |
|
|
45 |
PartitionQueriesGenerator.STRUCTURAL_UNITS = ['sp']; // Applying the partition on these structural units |
|
46 |
PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['who']; // Applying the partition on these properties of structural units defined above |
|
47 |
|
|
48 |
PartitionQueriesGenerator.STRUCTURAL_UNITS_TO_IGNORE = ['speaker']; // These structural units will be removed from the partition. NOTE : If doing that you need to define an EXPAND_TARGET |
|
49 |
// if you want some subcorpus parts that will manage sequential positions queries |
|
50 |
|
|
51 |
PartitionQueriesGenerator.EXPAND_TARGET = 'lg'; // Expand the results to this structural unit parent target. If some structural units to ignore are defined |
|
52 |
// and the expand target is upper on the hierarchy than them, the structural units to ignore WON'T be ignored |
|
53 |
|
|
54 |
//PartitionQueriesGenerator.PARTITION_NAME = 'gugu'; // The partition name. If empty or not defined, the partition will be autonamed regarding of the structural units and properties |
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
// Test 2 : partition sur arborescence (multi-niveau sur 2 niveaux) |
|
60 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES" |
|
61 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1']; |
|
62 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name']; |
|
63 |
|
|
64 |
|
|
65 |
// Test 3 : partition sur arborescence (multi-niveau sur 3 niveaux) |
|
66 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES" |
|
67 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2']; |
|
68 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'name']; |
|
69 |
|
|
70 |
|
|
71 |
// Test 4 : partition sur arborescence (multi-niveau sur 4 niveaux) |
|
72 |
// Ajouter le 'n' permet par exemple ici de trier les parties par ordre de scène car sinon le tri est problématique, ex : "SCENE II" passe avant "SCENE PREMIERE" |
|
73 |
//def CORPUS_NAME = "LIVRETOPERA2" |
|
74 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2', 'div2']; |
|
75 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'n', 'name']; |
|
76 |
|
|
77 |
|
|
78 |
// Test 5 : partitions croisées (sur plusieurs propriétés d'une même structure) |
|
79 |
//def CORPUS_NAME = "DISCOURS" |
|
80 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'text']; |
|
81 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['loc', 'type']; |
|
82 |
|
|
83 |
|
|
84 |
|
|
85 |
// Tests |
|
86 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES" |
|
87 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1', 'sp']; |
|
88 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['name', 'who']; |
|
89 |
|
|
90 |
// Tests |
|
91 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES" |
|
92 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1']; |
|
93 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['n']; |
|
94 |
//PartitionQueriesGenerator.PART_NAMES_PREFIX = 'act_'; |
|
95 |
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
// *************************** Debug parameters ************************************************************ |
|
100 |
|
|
101 |
|
|
102 |
PartitionQueriesGenerator.DEBUG = 0; // If DEBUG != 0 then partition is not created, |
|
103 |
// script only outputs the created queries and part names strings in console |
|
104 |
|
|
105 |
|
|
106 |
|
|
107 |
// *************************** End of parameters ************************************************************ |
|
108 |
|
|
109 |
|
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
|
115 |
// Running |
|
116 |
def partition = PartitionQueriesGenerator.createPartition(CORPUS_NAME, SUBCORPUS_NAME); |
|
117 |
|
|
118 |
|
|
119 |
// Refreshing the RCP component |
|
120 |
if(partition != null) { |
|
121 |
monitor.syncExec(new Runnable() { |
|
122 |
public void run() { |
|
123 |
CorporaView.refresh(); |
|
124 |
CorporaView.expand(partition.getParent()); |
|
125 |
} |
|
126 |
}); |
|
127 |
} |
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
|
133 |
/** |
|
134 |
* Create a list of queries and part names regarding the structural units, structural units properties, structural units to ignore user defined lists and expand target value specified. |
|
135 |
* @author s |
|
136 |
* |
|
137 |
*/ |
|
138 |
public class PartitionQueriesGenerator { |
|
139 |
|
|
140 |
public static int DEBUG = 0; // si DEBUG != 0, alors les requêtes sont affichées mais la partition n'est pas créée |
|
141 |
|
|
142 |
public static String PARTITION_NAME = ''; |
|
143 |
public static String[] STRUCTURAL_UNITS = []; // Liste des unités structurelles sur lesquelles effectuer la partition, ex: ['text', 'div1'] |
|
144 |
public static String[] STRUCTURAL_UNITS_PROPERTIES = []; // Propriétés des unités structurelles sur lesquelles effectuer la partition, ex : ['id', 'name'] |
|
145 |
public static String[] STRUCTURAL_UNITS_TO_IGNORE = []; // Structure à ignorer, ex. CQL : !speaker |
|
146 |
public static String PART_NAMES_PREFIX = ''; |
|
147 |
public static String EXPAND_TARGET = null; // Expand to target, englobe les empans jusqu'à la balise parente spécifiée. NOTE : Le expand entre en conflit avec les sections à ignorer. |
|
148 |
// Si la target est à un niveau supérieur aux balises à ignorer, il les remet dans liste de résultats CWB et elles ne sont donc pas ignorées |
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
public static ArrayList<String> queries = new ArrayList<String>(); |
|
153 |
public static ArrayList<String> partNames = new ArrayList<String>(); |
|
154 |
|
|
155 |
|
|
156 |
/** |
|
157 |
* Init the generator and process. |
|
158 |
* @param corpusName |
|
159 |
*/ |
|
160 |
public static Partition createPartition(String corpusName, String subcorpusName) { |
|
161 |
|
|
162 |
if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) { |
|
163 |
|
|
164 |
// TODO : Log |
|
165 |
println '**************************************************************************************************************' |
|
166 |
println 'Creating the queries on corpus "' + corpusName + "'" ; |
|
167 |
|
|
168 |
Corpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusName); |
|
169 |
|
|
170 |
|
|
171 |
// Subcorpora |
|
172 |
if(subcorpusName != null) { |
|
173 |
corpus = corpus.getSubcorpusByName(subcorpusName); |
|
174 |
} |
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
// Recursing through the corpus and subcorpus |
|
179 |
process(corpus, 0, '', ''); |
|
180 |
|
|
181 |
// Finalizing the queries |
|
182 |
finalizeQueries(); |
|
183 |
|
|
184 |
|
|
185 |
// TODO : Debug |
|
186 |
// Displaying the partition name |
|
187 |
println ''; |
|
188 |
println 'PARTITION_NAME: ' + PartitionQueriesGenerator.PARTITION_NAME; |
|
189 |
|
|
190 |
// Displaying the queries |
|
191 |
println 'Queries (count = ' + PartitionQueriesGenerator.queries.size() + '):'; |
|
192 |
for(query in PartitionQueriesGenerator.queries) { |
|
193 |
println query; |
|
194 |
} |
|
195 |
// Displaying the part names |
|
196 |
println 'Partnames (count = ' + PartitionQueriesGenerator.partNames.size() + '):'; |
|
197 |
for(partName in PartitionQueriesGenerator.partNames) { |
|
198 |
print partName + ' / '; |
|
199 |
} |
|
200 |
|
|
201 |
|
|
202 |
// TODO : Log |
|
203 |
println 'Queries created.'; |
|
204 |
|
|
205 |
// Creating the partition |
|
206 |
if(DEBUG == 0 && queries.size() == partNames.size()) { |
|
207 |
return corpus.createPartition(PARTITION_NAME, queries, partNames); |
|
208 |
} |
|
209 |
|
|
210 |
} |
|
211 |
else { |
|
212 |
// TODO : Log |
|
213 |
println 'Structural units count or structural units properties count error.'; |
|
214 |
return null |
|
215 |
} |
|
216 |
|
|
217 |
} |
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
|
|
222 |
/** |
|
223 |
* Recurse through structural units and structural units properties of corpus and create the queries and the part names. |
|
224 |
* @param corpus the corpus or subcorpus |
|
225 |
* @param index the index for recursion |
|
226 |
* @param tmpQuery the temporary query for creating subcorpus part |
|
227 |
* @param tmpPartName the temporary part name of the subcorpus part |
|
228 |
*/ |
|
229 |
protected static void process(Corpus corpus, int index, String tmpQuery, String tmpPartName) { |
|
230 |
|
|
231 |
|
|
232 |
// End of array |
|
233 |
if(index >= STRUCTURAL_UNITS.size()) { |
|
234 |
|
|
235 |
queries.add(tmpQuery); |
|
236 |
partNames.add(PART_NAMES_PREFIX + tmpPartName); |
|
237 |
|
|
238 |
return; |
|
239 |
} |
|
240 |
|
|
241 |
StructuralUnit su = corpus.getStructuralUnit(STRUCTURAL_UNITS[index]); |
|
242 |
StructuralUnitProperty sup = su.getProperty(STRUCTURAL_UNITS_PROPERTIES[index]); |
|
243 |
|
|
244 |
// TODO : Log |
|
245 |
println '' |
|
246 |
if(index == 0) { |
|
247 |
println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on mother corpus "' + corpus.getName() + '"'; |
|
248 |
} |
|
249 |
else { |
|
250 |
println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on subcorpus part "' + tmpPartName + '"'; |
|
251 |
} |
|
252 |
println '' |
|
253 |
|
|
254 |
|
|
255 |
// Creating the queries parts for each structural units properties values |
|
256 |
//for (supValue in sup.getOrderedValues()) { // TODO : signaler bug Matthieu, on ne devrait pas être obligé de repasser le sous-corpus à la méthode car sup a déjà été créée depuis le sous-corpus ? getValues() bugge aussi |
|
257 |
for (supValue in sup.getOrderedValues(corpus)) { |
|
258 |
|
|
259 |
// TODO : Log |
|
260 |
println '' |
|
261 |
println 'Value "' + supValue + '"'; |
|
262 |
println '' |
|
263 |
|
|
264 |
|
|
265 |
// Getting the subcorpus linked to the structural unit property value |
|
266 |
Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp" + UUID.randomUUID()); |
|
267 |
|
|
268 |
// Partition conditions and part name separators |
|
269 |
String and = ''; |
|
270 |
String underscore = ''; |
|
271 |
if(tmpQuery != '') { |
|
272 |
underscore = '_'; |
|
273 |
and = ' & '; |
|
274 |
} |
|
275 |
|
|
276 |
|
|
277 |
process(tmpSubcorpus, index + 1, (tmpQuery + and + '_.' + sup.getFullName() + '="' + supValue + '"'), tmpPartName + underscore + supValue); |
|
278 |
|
|
279 |
// Deleting the temporary subcorpus |
|
280 |
// TODO : bug : cette méthode ne supprime pas le corpus sans doute car il faut que le sous-corpus ne contienne pas d'autres sous-corpus ? le delete() en revanche fonctionne. |
|
281 |
// corpus.dropSubcorpus(tmpSubcorpus); |
|
282 |
tmpSubcorpus.delete(); |
|
283 |
|
|
284 |
} |
|
285 |
} |
|
286 |
|
|
287 |
|
|
288 |
/** |
|
289 |
* Autoname the partition. |
|
290 |
* @param partitionName |
|
291 |
*/ |
|
292 |
protected static void autoNamePartition(String partitionName) { |
|
293 |
|
|
294 |
// Structural units names and properties |
|
295 |
for(int i = 0; i < STRUCTURAL_UNITS.size(); i++) { |
|
296 |
partitionName += STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '.'; |
|
297 |
} |
|
298 |
|
|
299 |
// Structural units to ignore |
|
300 |
for(int i = 0; i < STRUCTURAL_UNITS_TO_IGNORE.size(); i++) { |
|
301 |
partitionName += 'NOT_' + STRUCTURAL_UNITS_TO_IGNORE[i] + '.'; |
|
302 |
} |
|
303 |
|
|
304 |
|
|
305 |
// Removing last point in name |
|
306 |
PARTITION_NAME = partitionName.substring(0, partitionName.length() - 1); |
|
307 |
} |
|
308 |
|
|
309 |
|
|
310 |
/** |
|
311 |
* Finalize the queries. |
|
312 |
*/ |
|
313 |
protected static void finalizeQueries() { |
|
314 |
|
|
315 |
String expandTo = ''; |
|
316 |
// Expanding to user defined target |
|
317 |
if(EXPAND_TARGET != null && EXPAND_TARGET != '') { |
|
318 |
expandTo = ' expand to ' + EXPAND_TARGET; |
|
319 |
} |
|
320 |
// Expanding to last child structural unit in user defined hierarchy |
|
321 |
else if(STRUCTURAL_UNITS_TO_IGNORE.size() == 0) { |
|
322 |
expandTo = ' expand to ' + STRUCTURAL_UNITS[STRUCTURAL_UNITS.size() - 1]; |
|
323 |
} |
|
324 |
|
|
325 |
// Autonaming the partition |
|
326 |
if(PARTITION_NAME == '') { |
|
327 |
autoNamePartition(PARTITION_NAME); |
|
328 |
// Finalizing partition name |
|
329 |
PARTITION_NAME += expandTo.replace(' expand to', '.EXPAND TO').replace(' ', '_'); |
|
330 |
} |
|
331 |
|
|
332 |
|
|
333 |
// Finalizing queries |
|
334 |
for(int j = 0; j < queries.size(); j++) { |
|
335 |
|
|
336 |
String queryEnd = ''; |
|
337 |
|
|
338 |
// Removing some sections |
|
339 |
for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) { |
|
340 |
queryEnd += ' & !' + sectionToIgnore; |
|
341 |
} |
|
342 |
|
|
343 |
queryEnd += ']' + expandTo; |
|
344 |
|
|
345 |
queries.set(j, '[' + queries.get(j) + queryEnd); |
|
346 |
} |
|
347 |
} |
|
348 |
|
|
349 |
} |
|
350 |
|
|
351 |
|
|
352 |
|
|
353 |
|
|
354 |
|
|
355 |
|
|
356 |
/* |
|
357 |
// Test dialogue de confirmation avant création de la partition |
|
358 |
JFrame frame = new JFrame('test'); |
|
359 |
//frame.setMinimumSize(new Dimension(400, 400)); |
|
360 |
//frame.setVisible(true); |
|
361 |
//frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); |
|
362 |
int n = JOptionPane.showConfirmDialog( |
|
363 |
frame, "Would you like green eggs and ham?", |
|
364 |
"An Inane Question", |
|
365 |
JOptionPane.YES_NO_OPTION); |
|
366 |
if (n == JOptionPane.YES_OPTION) { |
|
367 |
frame.setTitle("Ewww!"); |
|
368 |
} else if (n == JOptionPane.NO_OPTION) { |
|
369 |
frame.setTitle("Me neither!"); |
|
370 |
} else { |
|
371 |
frame.setTitle("Come on -- tell me!"); |
|
372 |
}*/ |
|
373 |
|
|
374 |
|
|
375 |
|
|
376 |
|
|
377 |
|
|
378 |
//// Récupération des unités structurelles et création de la liste des requêtes CQL |
|
379 |
//int i = 0; |
|
380 |
//ArrayList<String> queries = new ArrayList<String>(); |
|
381 |
//ArrayList<String> partNames = new ArrayList<String>(); |
|
382 |
// |
|
383 |
// |
|
384 |
//int currentQueriesCount; |
|
385 |
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) { |
|
386 |
// |
|
387 |
// // Parcours des unités strcuturelles à traiter |
|
388 |
// for (suName in STRUCTURAL_UNITS) { |
|
389 |
// |
|
390 |
// // Définition auto du nom de partition |
|
391 |
// PARTITION_NAME += suName + '_'; |
|
392 |
// |
|
393 |
// |
|
394 |
// StructuralUnit su = corpus.getStructuralUnit(suName); |
|
395 |
// |
|
396 |
// |
|
397 |
// // TODO : Debug |
|
398 |
// println '' |
|
399 |
// println 'Structural Unit Name: ' + su.getName(); |
|
400 |
// |
|
401 |
// |
|
402 |
// // Récupération des propriétés d'unités structurelles |
|
403 |
// for (StructuralUnitProperty sup in su.getProperties()) { |
|
404 |
// |
|
405 |
// |
|
406 |
// if(STRUCTURAL_UNITS_PROPERTIES[i] != '' && sup.getName() == STRUCTURAL_UNITS_PROPERTIES[i]) { |
|
407 |
// |
|
408 |
// // Définition auto du nom de partition |
|
409 |
// PARTITION_NAME += STRUCTURAL_UNITS_PROPERTIES[i] + "."; |
|
410 |
// |
|
411 |
// // TODO : Debug |
|
412 |
// println "Structural Unit Property: " + sup.getName() + " "; |
|
413 |
// |
|
414 |
// |
|
415 |
// // Récupération des valeurs des propriétés d'unités structurelles |
|
416 |
// |
|
417 |
// //for (supValue in sup.getOrderedValues()) { |
|
418 |
// for (supValue in sup.getOrderedValues()) { |
|
419 |
// |
|
420 |
// // Récupération du sous-corpus lié à la valeur de propriété de structure |
|
421 |
// Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp"); |
|
422 |
// |
|
423 |
// String query = suName + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '="' + supValue + '"'; |
|
424 |
// |
|
425 |
// |
|
426 |
// queries.add(query); |
|
427 |
// partNames.add(supValue); |
|
428 |
// |
|
429 |
// // TODO : Debug |
|
430 |
// print supValue + " "; |
|
431 |
// |
|
432 |
// // Suppression du sous-corpus temporaire |
|
433 |
// corpus.dropSubcorpus(tmpSubcorpus); |
|
434 |
// |
|
435 |
// } |
|
436 |
// |
|
437 |
// } |
|
438 |
// } |
|
439 |
// |
|
440 |
// |
|
441 |
//// // Ajout des niveaux inférieurs |
|
442 |
//// for(int i = 1; i < STRUCTURAL_UNITS.size(); i++) { |
|
443 |
//// |
|
444 |
//// for(int j = 0; j < queries.size(); j++) { |
|
445 |
//// queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"'); |
|
446 |
//// |
|
447 |
//// // Noms de partitions |
|
448 |
//// partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i]); |
|
449 |
//// } |
|
450 |
//// } |
|
451 |
// |
|
452 |
// |
|
453 |
// |
|
454 |
// |
|
455 |
// |
|
456 |
// |
|
457 |
// i++; |
|
458 |
// // } |
|
459 |
// // else { |
|
460 |
// // |
|
461 |
// // } |
|
462 |
// |
|
463 |
// } |
|
464 |
// |
|
465 |
// |
|
466 |
// |
|
467 |
// |
|
468 |
// |
|
469 |
// // Clôture des requêtes |
|
470 |
// for(int j = 0; j < queries.size(); j++) { |
|
471 |
// |
|
472 |
// String queryEnd = ''; |
|
473 |
// |
|
474 |
// // Suppression de sections |
|
475 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) { |
|
476 |
// queryEnd += ' & !' + sectionToIgnore; |
|
477 |
// } |
|
478 |
// |
|
479 |
// queryEnd += ']'; |
|
480 |
// |
|
481 |
// // Expand to target |
|
482 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') { |
|
483 |
// queryEnd += ' expand to ' + EXPAND_TARGET; |
|
484 |
// } |
|
485 |
// |
|
486 |
// queries.set(j, '[_.' + queries.get(j) + queryEnd); |
|
487 |
// } |
|
488 |
//} |
|
489 |
|
|
490 |
|
|
491 |
|
|
492 |
|
|
493 |
|
|
494 |
|
|
495 |
|
|
496 |
|
|
497 |
|
|
498 |
|
|
499 |
|
|
500 |
|
|
501 |
|
|
502 |
|
|
503 |
|
|
504 |
|
|
505 |
////for (suName in STRUCTURAL_UNITS) { |
|
506 |
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) { |
|
507 |
// |
|
508 |
// suName = STRUCTURAL_UNITS[0] |
|
509 |
// //if(STRUCTURAL_UNITS.size() > 1) { |
|
510 |
// |
|
511 |
// // Définition auto du nom de partition |
|
512 |
// PARTITION_NAME += suName + '_'; |
|
513 |
// |
|
514 |
// |
|
515 |
// // TODO : Debug |
|
516 |
// println '' |
|
517 |
// println "Structural Unit Name: $suName" |
|
518 |
// |
|
519 |
// // Récupération des valeurs des propriétés d'unités structurelles |
|
520 |
// for (sup in discours.getStructuralUnitProperties(suName.asType(String))) { |
|
521 |
// |
|
522 |
// supName = STRUCTURAL_UNITS_PROPERTIES[0]; |
|
523 |
// |
|
524 |
// if(supName != '' && sup.getName() == supName) { |
|
525 |
// |
|
526 |
// // Définition auto du nom de partition |
|
527 |
// PARTITION_NAME += supName + "."; |
|
528 |
// |
|
529 |
// // TODO : Debug |
|
530 |
// println "Structural Unit Property: " + supName + " "; |
|
531 |
// |
|
532 |
// for (supValue in sup.getOrderedValues()) { |
|
533 |
// |
|
534 |
// String query = '[_.' + suName + '_' + supName + '="' + supValue + '"'; |
|
535 |
// |
|
536 |
// // Suppression de sections |
|
537 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) { |
|
538 |
// query += ' & !' + sectionToIgnore; |
|
539 |
// } |
|
540 |
// |
|
541 |
// |
|
542 |
// |
|
543 |
// queries.add(query); |
|
544 |
// partNames.add(supValue); |
|
545 |
// |
|
546 |
// // TODO : Debug |
|
547 |
// print supValue + " "; |
|
548 |
// } |
|
549 |
// } |
|
550 |
// } |
|
551 |
// |
|
552 |
// |
|
553 |
// // Ajout des niveaux inférieurs |
|
554 |
// for(int i = 1; i < STRUCTURAL_UNITS.size(); i++) { |
|
555 |
// |
|
556 |
// for(int j = 0; j < queries.size(); j++) { |
|
557 |
// queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"'); |
|
558 |
// |
|
559 |
// // Noms de partitions |
|
560 |
// partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i]); |
|
561 |
// } |
|
562 |
// } |
|
563 |
// |
|
564 |
// |
|
565 |
// // Clôture des requêtes |
|
566 |
// for(int j = 0; j < queries.size(); j++) { |
|
567 |
// |
|
568 |
// String queryEnd = ']'; |
|
569 |
// |
|
570 |
// // Expand to target |
|
571 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') { |
|
572 |
// queryEnd += ' expand to ' + EXPAND_TARGET; |
|
573 |
// } |
|
574 |
// |
|
575 |
// queries.set(j, queries.get(j) + queryEnd); |
|
576 |
// } |
|
577 |
// |
|
578 |
// |
|
579 |
// |
|
580 |
// //i++; |
|
581 |
//// } |
|
582 |
//// else { |
|
583 |
//// |
|
584 |
//// } |
|
585 |
// |
|
586 |
////} |
|
587 |
//} |
|
588 |
|
|
589 |
|
|
590 |
|
|
591 |
// Fonctionnelle pour 1 niveau de hiérarchie |
|
592 |
//// Récupération des unités structurelles et création de la liste des requêtes CQL |
|
593 |
//int i = 0; |
|
594 |
//String supName; |
|
595 |
//ArrayList queries = new ArrayList(); |
|
596 |
//ArrayList partNames = new ArrayList(); |
|
597 |
// |
|
598 |
//for (suName in STRUCTURAL_UNITS) { |
|
599 |
// |
|
600 |
// |
|
601 |
// // Définition auto du nom de partition |
|
602 |
// NAME += suName + '_'; |
|
603 |
// |
|
604 |
// |
|
605 |
// |
|
606 |
// println "Structural Unit Name: $suName" |
|
607 |
// |
|
608 |
// // Récupération des valeurs des propriétés d'unités structurelles |
|
609 |
// for (sup in discours.getStructuralUnitProperties(suName.asType(String))) { |
|
610 |
// |
|
611 |
// supName = STRUCTURAL_UNITS_PROPERTIES[i]; |
|
612 |
// |
|
613 |
// if(supName != '' && sup.getName() == supName) { |
|
614 |
// |
|
615 |
// // Définition auto du nom de partition |
|
616 |
// NAME += supName + "."; |
|
617 |
// |
|
618 |
// // TODO : Debug |
|
619 |
// println "Structural Unit Property: " + supName + " "; |
|
620 |
// |
|
621 |
// for (supValue in sup.getOrderedValues()) { |
|
622 |
// |
|
623 |
// String query = '[_.' + suName + '_' + supName + '="' + supValue + '"'; |
|
624 |
// |
|
625 |
// // Suppression de sections |
|
626 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) { |
|
627 |
// query += ' & !' + sectionToIgnore; |
|
628 |
// } |
|
629 |
// |
|
630 |
// query += ']'; |
|
631 |
// |
|
632 |
// // Expand to target |
|
633 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') { |
|
634 |
// query += ' expand to ' + EXPAND_TARGET; |
|
635 |
// } |
|
636 |
// |
|
637 |
// |
|
638 |
// queries.add(query); |
|
639 |
// partNames.add(supValue); |
|
640 |
// |
|
641 |
// // TODO : Debug |
|
642 |
// print supValue + " "; |
|
643 |
// } |
|
644 |
// } |
|
645 |
// } |
|
646 |
// i++; |
|
647 |
//} |
|
648 |
|
|
649 |
|
|
650 |
|
|
651 |
|
|
652 |
//// start |
|
653 |
//if (queries.size() == partnames.size()) { |
|
654 |
//// def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS) |
|
655 |
// def partition; |
|
656 |
// if (SUBCORPUS == null) { |
|
657 |
// println "partition build with $discours" |
|
658 |
// partition = discours.createPartition(NAME, queries.as, PARTNAMES) |
|
659 |
// } else { |
|
660 |
// def subcorpus |
|
661 |
// if (SUBCORPUSQUERY == null) { |
|
662 |
// println "partition build with subcorpus by name $SUBCORPUS" |
|
663 |
// subcorpus = discours.getSubcorpusByName(SUBCORPUS) |
|
664 |
// if (subcorpus == null) { |
|
665 |
// println "Error: SUBCORPUS NOT FOUND with name : $SUBCORPUS" |
|
666 |
// } |
|
667 |
// } else { |
|
668 |
// println "partition build with subcorpus with query $SUBCORPUSQUERY" |
|
669 |
// subcorpus = discours.createSubcorpus(new Query(SUBCORPUSQUERY), SUBCORPUS) |
|
670 |
// } |
|
671 |
// partition = subcorpus.createPartition(NAME, QUERIES, PARTNAMES) |
|
672 |
// } |
|
673 |
// |
|
674 |
// def list = Arrays.asList(partition.getPartSizes()) |
|
675 |
// println "Partition created $NAME: "+list+" parts" |
|
676 |
// // println "Total size: "+list.sum()+" - is equal to (sub)corpus size : "+(list.sum() == partition.getCorpus().getSize()) |
|
677 |
// println "Total size: "+list.sum()+" - (sub)corpus size : "+(partition.getCorpus().getSize()) |
|
678 |
// |
|
679 |
// monitor.syncExec(new Runnable() { |
|
680 |
// public void run() { |
|
681 |
// CorporaView.refresh(); |
|
682 |
// CorporaView.expand(partition.getParent()); |
|
683 |
// } |
|
684 |
//}); |
|
685 |
//} else { |
|
686 |
// println "QUERIES.size() != PARTNAMES.size() = "+QUERIES.size()+" != "+PARTNAMES.size() |
|
687 |
//} |
|
688 |
|
|
689 |
|
|
690 |
|
|
691 |
|
|
692 |
// parameters |
|
693 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES" |
|
694 |
////def SUBCORPUS = "NOMDUSOUSCORPUS" |
|
695 |
//def SUBCORPUS = null // si partition sur corpus |
|
696 |
////def SUBCORPUSQUERY = '"je" expand to s' |
|
697 |
//def SUBCORPUSQUERY = null // si sous-corpus par nom |
|
698 |
|
|
699 |
|
|
700 |
|
|
701 |
|
|
702 |
|
|
703 |
|
|
704 |
//def TEXT = "01_ACHILLE ET POLYXENE" |
|
705 |
// def TEXT = "02_ZEPHIRE ET FLORE" |
|
706 |
/*def QUERIES = [ |
|
707 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="1" & _.sp_who=".*"]+</lg>', |
|
708 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="2" & _.sp_who=".*"]+</lg>', |
|
709 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="3" & _.sp_who=".*"]+</lg>', |
|
710 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="4" & _.sp_who=".*"]+</lg>', |
|
711 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="5" & _.sp_who=".*"]+</lg>', |
|
712 |
] |
|
713 |
def PARTNAMES = [ |
|
714 |
"acte_1", |
|
715 |
"acte_2", |
|
716 |
"acte_3", |
|
717 |
"acte_4", |
|
718 |
"acte_5", |
|
719 |
]*/ |
tmp/org.txm.groovy.core/src/groovy/tests/tests_cql.groovy (revision 321) | ||
---|---|---|
1 |
package tests |
|
2 |
import java.util.UUID; |
|
3 |
|
|
4 |
import org.txm.Toolbox |
|
5 |
import org.txm.searchengine.cqp.MemCqiClient |
|
6 |
import org.txm.searchengine.cqp.MemCqiServer |
|
7 |
import org.txm.searchengine.cqp.corpus.* |
|
8 |
import org.txm.functions.ReferencePattern |
|
9 |
import org.txm.rcpapplication.views.* |
|
10 |
import org.txm.searchengine.cqp.corpus.query.Query |
|
11 |
|
|
12 |
// Debug |
|
13 |
println "******************************** Starting ********************************************"; |
|
14 |
|
|
15 |
|
|
16 |
// Tests suite de requêtes directement sur le serveur et création manuelle du sous-corpus |
|
17 |
|
|
18 |
// parameters |
|
19 |
def CORPUS = "LIVRETOPERA10TEXTES" |
|
20 |
//def SUBCORPUS = "NOMDUSOUSCORPUS" |
|
21 |
// def SUBCORPUS = null // si partition sur corpus |
|
22 |
//def SUBCORPUSQUERY = '"je" expand to s' |
|
23 |
//def SUBCORPUSQUERY = null // si sous-corpus par nom |
|
24 |
|
|
25 |
|
|
26 |
def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS) |
|
27 |
def subcorpusCqpId = 'S' + UUID.randomUUID().toString(); |
|
28 |
|
|
29 |
try { |
|
30 |
def server = (MemCqiServer)Toolbox.getCqiServer(); |
|
31 |
|
|
32 |
// server.query('show corpora;'); |
|
33 |
// server.query("info " + CORPUS + ";"); |
|
34 |
|
|
35 |
server.query(CORPUS + ";"); |
|
36 |
|
|
37 |
//client.query(SUBCORPUSQUERY); |
|
38 |
|
|
39 |
// Suppression de variable de résultats |
|
40 |
// server.query("discard A;"); |
|
41 |
// server.query("discard B;"); |
|
42 |
// server.query("discard C;"); |
|
43 |
|
|
44 |
// server.query('C = /region[sp,a]::a.sp_who="IPHIS";'); |
|
45 |
|
|
46 |
|
|
47 |
// Test mono ligne : ne fonctionne pas en mode server |
|
48 |
// server.query('A = /region[sp]; B = /region[speaker]; C = difference B A;'); |
|
49 |
|
|
50 |
|
|
51 |
|
|
52 |
// server.query('A = /region[sp];'); |
|
53 |
// server.query('B = /region[speaker];'); |
|
54 |
// server.query('C = difference A B;'); |
|
55 |
|
|
56 |
// Tests options d'affichage |
|
57 |
server.query('show cd;'); |
|
58 |
server.query('show +div1 +div2 +sp +lg +l +speaker;'); // Affichage des balises XML dans les listes de résultats |
|
59 |
//server.query('set ShowTagAttributes on;'); |
|
60 |
|
|
61 |
// Tests |
|
62 |
// server.query('A = /region[sp,a]::a.sp_who="THALIE";'); |
|
63 |
// server.query('B = /region[speaker];'); |
|
64 |
// server.query(subcorpusCqpId + ' = diff A B;'); |
|
65 |
|
|
66 |
|
|
67 |
//server.query(subcorpusCqpId + ' = B;'); |
|
68 |
|
|
69 |
|
|
70 |
|
|
71 |
// server.query(subcorpusCqpId + ' = <sp>[(_.) != "speaker"]+</sp>;'); |
|
72 |
//server.query(subcorpusCqpId + ' = <div1>[]*<sp>[]+</sp>[]*</div1>;'); |
|
73 |
|
|
74 |
//server.query(subcorpusCqpId + ' = <div1>[!speaker]*</div1>;'); |
|
75 |
|
|
76 |
// server.query(subcorpusCqpId + ' = <lg>[_.div1_n=".*" & _.sp_who="THALIE"]+</lg>;'); |
|
77 |
// server.query(subcorpusCqpId + ' = /region[i];'); |
|
78 |
|
|
79 |
|
|
80 |
// Tests de requête sur le sous-corpus créé |
|
81 |
//server.query('set MatchingStrategy shortest;'); // standard, shortest, longest, traditional |
|
82 |
//server.query(subcorpusCqpId + ' = [div1 & _.sp_who="THALIE" & !speaker];'); |
|
83 |
//server.query(subcorpusCqpId + ' = [_.div1_name=".*" & !speaker];'); |
|
84 |
//server.query(subcorpusCqpId + ' = [_.div1_name=".*" & !speaker] expand to lg;'); |
|
85 |
server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;'); |
|
86 |
|
|
87 |
|
|
88 |
//server.query('group ' + subcorpusCqpId + ' match word;'); |
|
89 |
|
|
90 |
server.query(subcorpusCqpId + ';'); |
|
91 |
|
|
92 |
|
|
93 |
//server.query(subcorpusCqpId + ' = [] !;'); |
|
94 |
//server.query(subcorpusCqpId + ';'); |
|
95 |
//server.query(subcorpusCqpId + ' = [] !;'); |
|
96 |
// server.query(subcorpusCqpId + ' = [] expand to div1 & !speaker;'); |
|
97 |
|
|
98 |
|
|
99 |
|
|
100 |
//server.query(subcorpusCqpId + ' = [div1 & !speaker];'); |
|
101 |
//server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;'); |
|
102 |
//server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;'); |
|
103 |
|
|
104 |
//server.query(subcorpusCqpId + ' = [div1 & !speaker];'); |
|
105 |
|
|
106 |
//server.query(subcorpusCqpId + ' = [div1 &_.sp_who="THALIE" & !speaker];'); |
|
107 |
|
|
108 |
//server.query(subcorpusCqpId + ' = [div1 &_.sp_who="THALIE" & !speaker];'); |
|
109 |
//server.query(subcorpusCqpId + ' = "fin" [];'); |
|
110 |
//server.query(subcorpusCqpId + ' = [div1 & _.sp_who="THALIE" & !speaker];'); |
|
111 |
|
|
112 |
// Affichage du sous-corpus |
|
113 |
//server.query("cat " + subcorpusCqpId + " 0 10;") |
|
114 |
server.query("cat " + subcorpusCqpId + ";") |
|
115 |
|
|
116 |
//server.cqpQuery(discours.getCqpId(), 'subcorpusid', SUBCORPUSQUERY); |
|
117 |
|
|
118 |
|
|
119 |
|
|
120 |
// Affichage des erreurs |
|
121 |
try { |
|
122 |
System.out.println("client.getLastCqiError() : "+server.getLastCqiError()); |
|
123 |
} catch(Exception e) { System.out.println("Exception : "+e);} |
|
124 |
try { |
|
125 |
System.out.println("client.getLastCQPError() : "+server.getLastCQPError()); |
|
126 |
} catch(Exception e) { System.out.println("Exception : "+e);} |
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
// Création manuelle du sous-corpus |
|
131 |
// Subcorpus subcorpus = new Subcorpus(subcorpusCqpId, "subcorpus" + subcorpusCqpId.toString().substring(0, 5), discours, new Query()); // FIXME : est-ce que cela pose un problème de passer une Query() vide ici ? |
|
132 |
// discours.subcorpora.add(subcorpus); |
|
133 |
// subcorpus.registerToParent(); |
|
134 |
|
|
135 |
// Création manuelle d'une partition |
|
136 |
// subcorpus.createPartition(NAME, QUERIES, PARTNAMES) |
|
137 |
|
|
138 |
|
|
139 |
} |
|
140 |
catch(Exception e) { |
|
141 |
System.out.println("Exception : "+e); |
|
142 |
} |
|
143 |
|
|
144 |
|
|
145 |
|
|
146 |
// Refresh de l'interface |
|
147 |
monitor.syncExec(new Runnable() { |
|
148 |
public void run() { |
|
149 |
CorporaView.refresh(); |
|
150 |
//CorporaView.expand(partition.getParent()); |
|
151 |
} |
|
152 |
}); |
|
153 |
|
|
154 |
|
|
155 |
|
|
156 |
|
|
157 |
|
|
158 |
|
|
159 |
//// parameters |
|
160 |
//def CORPUS = "LIVRETOPERA10TEXTES" |
|
161 |
////def SUBCORPUS = "NOMDUSOUSCORPUS" |
|
162 |
//def SUBCORPUS = null // si partition sur corpus |
|
163 |
////def SUBCORPUSQUERY = '"je" expand to s' |
|
164 |
//def SUBCORPUSQUERY = null // si sous-corpus par nom |
|
165 |
// |
|
166 |
// |
|
167 |
// |
|
168 |
//// Liste des unités strcuturelles |
|
169 |
////def HIERARCHY = ['text_id', 'div1_n', 'sp_who', 'lg']; |
|
170 |
//def STRUCTURAL_UNITS = ['text', 'div1', 'sp']; |
|
171 |
//def STRUCTURAL_UNITS_PROPERTIES = ['id', 'n', 'who']; |
|
172 |
////def TARGET = 'lg'; |
|
173 |
// |
|
174 |
//String NAME = ''; |
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
//def TEXT = "01_ACHILLE ET POLYXENE" |
|
180 |
// def TEXT = "02_ZEPHIRE ET FLORE" |
|
181 |
/*def QUERIES = [ |
|
182 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="1" & _.sp_who=".*"]+</lg>', |
|
183 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="2" & _.sp_who=".*"]+</lg>', |
|
184 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="3" & _.sp_who=".*"]+</lg>', |
|
185 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="4" & _.sp_who=".*"]+</lg>', |
|
186 |
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="5" & _.sp_who=".*"]+</lg>', |
|
187 |
] |
|
188 |
def PARTNAMES = [ |
|
189 |
"acte_1", |
|
190 |
"acte_2", |
|
191 |
"acte_3", |
|
192 |
"acte_4", |
|
193 |
"acte_5", |
|
194 |
]*/ |
|
195 |
|
|
196 |
|
|
197 |
//def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS) |
|
198 |
// |
|
199 |
// |
|
200 |
//// Récupération des unités structurelles |
|
201 |
//def i = 0; |
|
202 |
//def supName; |
|
203 |
//def QUERIES; |
|
204 |
//def query; |
|
205 |
// |
|
206 |
//for (suName in STRUCTURAL_UNITS) { |
|
207 |
// |
|
208 |
// |
|
209 |
// // Définition auto du nom de partition |
|
210 |
// NAME += suName + '_'; |
|
211 |
// |
|
212 |
// //query = ''; |
|
213 |
// |
|
214 |
// |
|
215 |
// println "Structural Unit Name: $suName" |
|
216 |
// |
|
217 |
// // Récupération des valeurs des propriétés d'unités structurelles |
|
218 |
// for (sup in discours.getStructuralUnitProperties(suName.asType(String))) { |
|
219 |
// |
|
220 |
// supName = STRUCTURAL_UNITS_PROPERTIES[i]; |
|
221 |
// |
|
222 |
// if(supName != '' && sup.getName() == supName) { |
|
223 |
// |
|
224 |
// // Définition auto du nom de partition |
|
225 |
// NAME += supName + "."; |
|
226 |
// |
|
227 |
// println "Structural Unit Property: $supName"; |
|
228 |
// |
|
229 |
// for (supValue in sup.getOrderedValues()) { |
|
230 |
// |
|
231 |
// //query += '<' + TARGET + '>' + supName; |
|
232 |
// |
|
233 |
// |
|
234 |
// print "$supValue " |
|
235 |
// } |
|
236 |
// } |
|
237 |
// } |
|
238 |
// i++; |
|
239 |
//} |
|
240 |
// |
|
241 |
//// Suppression du dernier underscore ou point |
|
242 |
//NAME = NAME.substring(0, NAME.length() - 1); |
|
243 |
//// Debug |
|
244 |
//println "NAME: " + NAME; |
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
// start |
|
254 |
/*if (QUERIES.size() == PARTNAMES.size()) { |
|
255 |
|
|
256 |
def partition; |
|
257 |
|
|
258 |
|
|
259 |
// Test unités structurelles |
|
260 |
for (t in discours.getStructuralUnits()) { |
|
261 |
println "Structural Unit: $t" |
|
262 |
|
|
263 |
// Test propriétés d'unités structurelles |
|
264 |
for (g in discours.getStructuralUnitProperties(t.asType(String))) { |
|
265 |
|
|
266 |
println "\tStructural Unit Property: $g" |
|
267 |
|
|
268 |
// Test valeurs des propriétés d'unités structurelles |
|
269 |
for (h in g.getOrderedValues()) { |
|
270 |
|
|
271 |
print "$h, " |
|
272 |
|
|
273 |
} |
|
274 |
|
|
275 |
} |
|
276 |
|
|
277 |
} |
|
278 |
|
|
279 |
|
|
280 |
// Test propriétés |
|
281 |
for (t in discours.getProperties()) { |
|
282 |
println "Property: $t" |
|
283 |
} |
|
284 |
|
|
285 |
|
|
286 |
if (SUBCORPUS == null) { |
|
287 |
println "partition build with $discours" |
|
288 |
partition = discours.createPartition(NAME, QUERIES, PARTNAMES) |
|
289 |
} else { |
|
290 |
def subcorpus |
|
291 |
if (SUBCORPUSQUERY == null) { |
|
292 |
println "partition build with subcorpus by name $SUBCORPUS" |
|
293 |
subcorpus = discours.getSubcorpusByName(SUBCORPUS) |
|
294 |
if (subcorpus == null) { |
|
295 |
println "Error: SUBCORPUS NOT FOUND with name : $SUBCORPUS" |
|
296 |
} |
|
297 |
} else { |
|
298 |
println "partition build with subcorpus with query $SUBCORPUSQUERY" |
|
299 |
subcorpus = discours.createSubcorpus(new Query(SUBCORPUSQUERY), SUBCORPUS) |
|
300 |
} |
|
301 |
partition = subcorpus.createPartition(NAME, QUERIES, PARTNAMES) |
|
302 |
} |
|
303 |
|
|
304 |
def list = Arrays.asList(partition.getPartSizes()) |
|
305 |
println "Partition created $NAME: "+list+" parts" |
|
306 |
// println "Total size: "+list.sum()+" - is equal to (sub)corpus size : "+(list.sum() == partition.getCorpus().getSize()) |
|
307 |
println "Total size: "+list.sum()+" - (sub)corpus size : "+(partition.getCorpus().getSize()) |
|
308 |
|
|
309 |
monitor.syncExec(new Runnable() { |
|
310 |
public void run() { |
|
311 |
CorporaView.refresh(); |
|
312 |
CorporaView.expand(partition.getParent()); |
|
313 |
} |
|
314 |
}); |
|
315 |
} else { |
|
316 |
println "QUERIES.size() != PARTNAMES.size() = "+QUERIES.size()+" != "+PARTNAMES.size() |
|
317 |
}*/ |
|
318 |
|
tmp/org.txm.groovy.core/src/groovy/tests/tests_groovy.groovy (revision 321) | ||
---|---|---|
1 |
print(Progression5) |
tmp/org.txm.groovy.core/src/groovy/org/txm/svn/package.html (revision 321) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>Contains Groovy scripts used to manipulate SVN repositories</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/InjectAnnotations.groovy (revision 321) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (Tue, 24 Jan 2017) $ |
|
25 |
// $LastChangedRevision: 3400 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.scripts.tigersearch; |
|
29 |
|
|
30 |
import java.text.DateFormat; |
|
31 |
import java.util.Date; |
|
32 |
import java.util.ArrayList; |
|
33 |
import javax.xml.stream.*; |
|
34 |
import java.net.URL; |
|
35 |
import org.txm.importer.filters.*; |
|
36 |
|
|
37 |
// TODO: Auto-generated Javadoc |
|
38 |
/** |
|
39 |
* The Class InjectAnnotations. |
|
40 |
* |
|
41 |
* @author mdecorde |
|
42 |
* |
|
43 |
* inject annotations into ONE file |
|
44 |
*/ |
|
45 |
|
|
46 |
public class InjectAnnotations { |
|
47 |
|
|
48 |
/** The url. */ |
|
49 |
private def url; |
|
50 |
|
|
51 |
/** The input data. */ |
|
52 |
private def inputData; |
|
53 |
|
|
54 |
/** The factory. */ |
|
55 |
private def factory; |
|
56 |
|
|
57 |
/** The parser. */ |
|
58 |
private XMLStreamReader parser; |
|
59 |
|
|
60 |
/** The reader. */ |
|
61 |
private Reader reader; |
|
62 |
|
|
63 |
/** The output. */ |
|
64 |
private def output; |
|
65 |
|
|
66 |
/** The solotags. */ |
|
67 |
ArrayList<String> solotags; |
|
68 |
|
|
69 |
/** The lespos. */ |
|
70 |
HashSet<String> lespos = new HashSet<String>(); |
|
71 |
|
|
72 |
/** |
|
73 |
* Instantiates a new inject annotations. |
|
74 |
* |
|
75 |
* @param url the url |
|
76 |
* @param annotations the annotations |
|
77 |
* @param solotags the solotags |
|
78 |
*/ |
|
79 |
public InjectAnnotations(URL url, File annotations, |
|
80 |
ArrayList<String> solotags) { |
|
81 |
try { |
|
82 |
this.url = url; |
|
83 |
this.solotags = solotags; |
|
84 |
inputData = url.openStream(); |
|
85 |
factory = XMLInputFactory.newInstance(); |
|
86 |
parser = factory.createXMLStreamReader(inputData); |
|
87 |
|
|
88 |
reader = new FileReader(annotations); |
|
89 |
|
|
90 |
} catch (XMLStreamException ex) { |
|
91 |
System.out.println(ex); |
|
92 |
} catch (IOException ex) { |
|
93 |
System.out.println("IOException while parsing "); |
|
94 |
} |
|
95 |
} |
|
96 |
|
|
97 |
/** |
|
98 |
* Creates the output. |
|
99 |
* |
|
100 |
* @param outfile the outfile |
|
101 |
* @return true, if successful |
|
102 |
*/ |
|
103 |
private boolean createOutput(File outfile) { |
|
104 |
try { |
|
105 |
File f = outfile; |
|
106 |
output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); |
|
107 |
|
|
108 |
return true; |
|
109 |
} catch (Exception e) { |
|
110 |
System.out.println(e.getLocalizedMessage()); |
|
111 |
return false; |
|
112 |
} |
|
113 |
} |
|
114 |
|
|
115 |
/** |
|
116 |
* Gets the next annotation. |
|
117 |
* |
|
118 |
* @return the next annotation |
|
119 |
*/ |
|
120 |
private String getNextAnnotation() { |
|
121 |
String line = reader.readLine(); |
|
122 |
while (line.startsWith("<")) |
|
123 |
line = reader.readLine(); |
|
124 |
lespos.add(line.split("\t")[1]); |
|
125 |
return line = line.split("\t")[1]; |
|
126 |
} |
|
127 |
|
|
128 |
/** |
|
129 |
* Process. |
|
130 |
* |
|
131 |
* @param outfile the outfile |
|
132 |
* @return true, if successful |
|
133 |
*/ |
|
134 |
public boolean process(File outfile) { |
|
135 |
if (createOutput(outfile)) { |
|
136 |
|
|
137 |
String lastopenlocalname = ""; |
|
138 |
String localname = ""; |
|
139 |
try { |
|
140 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser |
|
141 |
.next()) { |
|
142 |
|
|
143 |
switch (event) { |
|
144 |
case XMLStreamConstants.START_ELEMENT: |
|
145 |
localname = parser.getLocalName(); |
|
146 |
String prefix = parser.getPrefix(); |
|
147 |
if (prefix == null || prefix == "") |
|
148 |
prefix = ""; |
|
149 |
else |
|
150 |
prefix += ":"; |
|
151 |
|
|
152 |
lastopenlocalname = localname; |
|
153 |
output.write("\n<" + prefix + localname); |
|
154 |
|
|
155 |
for (int i = 0; i < parser.getAttributeCount(); i++) |
|
156 |
output.write(" " + parser.getAttributeLocalName(i) |
|
157 |
+ "=\"" + parser.getAttributeValue(i) |
|
158 |
+ "\""); |
|
159 |
|
|
160 |
// get annotation |
|
161 |
if (localname.equals("t")) |
|
162 |
output.write(" pos=\"" + getNextAnnotation() |
|
163 |
+ "\""); |
|
164 |
|
|
165 |
if (solotags.contains(localname)) |
|
166 |
output.write("/>"); |
|
167 |
else |
|
168 |
output.write(">"); |
|
169 |
break; |
|
170 |
|
|
171 |
case XMLStreamConstants.END_ELEMENT: |
|
172 |
|
|
173 |
localname = parser.getLocalName(); |
|
174 |
String prefix = parser.getPrefix(); |
|
175 |
if (prefix == null || prefix == "") |
|
176 |
prefix = ""; |
|
177 |
else |
|
178 |
prefix += ":"; |
|
179 |
|
|
180 |
switch (localname) { |
|
181 |
|
|
182 |
default: |
|
183 |
if (!solotags.contains(localname)) |
|
184 |
if (lastopenlocalname.equals(localname)) |
|
185 |
output.write("</" + prefix + localname |
|
186 |
+ ">"); |
|
187 |
else |
|
188 |
output.write("\n</" + prefix + localname |
|
189 |
+ ">"); |
|
190 |
} |
|
191 |
break; |
|
192 |
|
|
193 |
case XMLStreamConstants.CHARACTERS: |
|
194 |
output.write(parser.getText().trim()); |
|
195 |
break; |
|
196 |
} |
|
197 |
} |
|
198 |
output.close(); |
|
199 |
parser.close(); |
|
200 |
} catch (XMLStreamException ex) { |
|
201 |
System.out.println(ex); |
|
202 |
} catch (IOException ex) { |
|
203 |
System.out.println("IOException while parsing " + inputData); |
|
204 |
} |
|
205 |
} |
|
206 |
} |
|
207 |
|
|
208 |
/** |
|
209 |
* Gets the feature. |
|
210 |
* |
|
211 |
* @param f the f |
|
212 |
* @return the feature |
|
213 |
*/ |
|
214 |
public void getFeature(File f) |
|
215 |
{ |
|
216 |
Writer writer = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8"); |
|
217 |
writer.write("<feature name=\"pos\" domain=\"T\">\n") |
|
218 |
for(String pos : lespos) |
|
219 |
writer.write("<value name=\""+pos+"\"></value>\n"); |
|
220 |
writer.write("</feature>\n") |
|
221 |
writer.close(); |
|
222 |
} |
|
223 |
|
|
224 |
/** |
|
225 |
* The main method. |
|
226 |
* |
|
227 |
* @param args the arguments |
|
228 |
*/ |
|
229 |
public static void main(String[] args) { |
|
230 |
|
|
231 |
String rootDir = "~/xml/beroul/"; |
|
232 |
new File(rootDir + "/identity/").mkdir(); |
|
233 |
|
|
234 |
ArrayList<String> milestones = new ArrayList<String>();// the tags who |
|
235 |
// you want them |
|
236 |
// to stay |
|
237 |
// milestones |
|
238 |
milestones.add("tagUsage"); |
|
239 |
milestones.add("pb"); |
|
240 |
milestones.add("lb"); |
|
241 |
milestones.add("catRef"); |
|
242 |
|
|
243 |
File srcfile = new File(rootDir, "beroul.xml"); |
|
244 |
File annotationsfiles = new File(rootDir, "result.tt"); |
|
245 |
File resultfile = new File(rootDir, "beroul-result.xml"); |
|
246 |
println("identity file : " + srcfile + " to : " + resultfile); |
|
247 |
|
|
248 |
def builder = new InjectAnnotations(srcfile.toURL(), annotationsfiles, |
|
249 |
milestones); |
|
250 |
builder.process(resultfile); |
|
251 |
|
|
252 |
return; |
|
253 |
} |
|
254 |
|
|
255 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/package.html (revision 321) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>Manage tigerSearch source file</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/InsertAnnotationsBeroul.groovy (revision 321) | ||
---|---|---|
1 |
/** |
|
2 |
* Main. |
|
3 |
* |
|
4 |
* @param args the args |
|
5 |
*/ |
|
6 |
// Copyright © 2010-2013 ENS de Lyon. |
|
7 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
8 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
9 |
// Sophia Antipolis, University of Paris 3. |
|
10 |
// |
|
11 |
// The TXM platform is free software: you can redistribute it |
|
12 |
// and/or modify it under the terms of the GNU General Public |
|
13 |
// License as published by the Free Software Foundation, |
|
14 |
// either version 2 of the License, or (at your option) any |
|
15 |
// later version. |
|
16 |
// |
|
17 |
// The TXM platform is distributed in the hope that it will be |
|
18 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
19 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
20 |
// PURPOSE. See the GNU General Public License for more |
|
21 |
// details. |
|
22 |
// |
|
23 |
// You should have received a copy of the GNU General |
|
24 |
// Public License along with the TXM platform. If not, see |
|
25 |
// http://www.gnu.org/licenses. |
|
26 |
// |
|
27 |
// |
|
28 |
// |
|
29 |
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $ |
|
30 |
// $LastChangedRevision: 2386 $ |
|
31 |
// $LastChangedBy: mdecorde $ |
|
32 |
// |
|
33 |
package org.txm.scripts.tigersearch; |
|
34 |
|
|
35 |
import org.txm.utils.treetagger.TreeTagger; |
|
36 |
import org.txm.scripts.teitxm.*; |
|
37 |
|
|
38 |
// TODO: Auto-generated Javadoc |
|
39 |
/** |
|
40 |
* script to insert annotation into beroul file. |
|
41 |
* |
|
42 |
* @return the java.lang. object |
|
43 |
*/ |
|
44 |
|
|
45 |
String home = System.getProperty("user.home") |
|
46 |
File rootDir = new File(home, "xml/fullberoul/") |
|
47 |
|
|
48 |
ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones |
|
49 |
milestones.add("tagUsage"); |
|
50 |
milestones.add("pb"); |
|
51 |
milestones.add("lb"); |
|
52 |
milestones.add("catRef"); |
|
53 |
|
|
54 |
//transform xml tiger >> TTsrc |
|
55 |
File srcfile = new File(rootDir,"beroul.xml"); |
|
56 |
File resultfile = new File(rootDir,"beroul.tt"); |
|
57 |
println("xml>>TT from : "+srcfile+" to : "+resultfile ); |
|
58 |
|
|
59 |
def builder = new BuildTTFile(srcfile.toURL(), milestones); |
|
60 |
builder.process(resultfile, "t"); |
|
61 |
|
|
62 |
//tag TT |
|
63 |
String infile = resultfile; |
|
64 |
String modelfile = home+"/treetagger/models/fro.par"; |
|
65 |
String outfile = rootDir.getAbsolutePath()+"/result.tt"; |
|
66 |
|
|
67 |
println("proj "+modelfile+ " on " +resultfile +" >> "+outfile); |
|
68 |
|
|
69 |
TreeTagger tt = new TreeTagger(home+"/treetagger/bin/"); |
|
70 |
tt.settoken(); |
|
71 |
tt.setquiet(); |
|
72 |
tt.setsgml(); |
|
73 |
tt.seteostag("<s>"); |
|
74 |
tt.treetagger( modelfile, infile, outfile) |
|
75 |
|
|
76 |
//inject new TTattributes |
|
77 |
//File srcfile = new File(rootDir,"beroul.xml"); |
|
78 |
File annotationsfiles = new File(rootDir,"result.tt"); |
|
79 |
File lastresultfile = new File(rootDir,"beroul-result.xml"); |
|
80 |
println("insert TT annotations : "+srcfile+" to : "+resultfile ); |
|
81 |
|
|
82 |
builder = new InjectAnnotations(srcfile.toURL(),annotationsfiles, milestones); |
|
83 |
builder.process(lastresultfile); |
|
84 |
|
|
85 |
builder.getFeature(new File(rootDir,"feature.xml")); |
|
86 |
/* |
|
87 |
//TAG with TnT |
|
88 |
//need to replace <s> by nothing and </s> by \n |
|
89 |
String encoding = "UTF-8" |
|
90 |
for(String text : texts) |
|
91 |
{ |
|
92 |
//patch src files |
|
93 |
File f = new File(textsDir,text+".t"); |
|
94 |
File temp = new File("tempFileCVScleaner") |
|
95 |
println("patch texts files "+f+": rmv <s> and replace </s>"); |
|
96 |
Reader reader = new InputStreamReader(new FileInputStream(f),encoding); |
|
97 |
Writer writer = new FileWriter(temp); |
|
98 |
reader.eachLine |
|
99 |
{ |
|
100 |
if(it.trim().startsWith("</s")) |
|
101 |
writer.write("\n") |
|
102 |
else if(it.trim().startsWith("<s")) |
|
103 |
writer.write("") |
|
104 |
else |
|
105 |
writer.write(it+"\n") |
|
106 |
} |
|
107 |
reader.close(); |
|
108 |
writer.close(); |
|
109 |
if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f |
|
110 |
}*/ |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/SubCorpusToSimpleCorpus.groovy (revision 321) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.scripts.tigersearch; |
|
29 |
|
|
30 |
import org.txm.sw.ReplaceXmlDomNode; |
|
31 |
|
|
32 |
import java.io.File; |
|
33 |
import java.util.ArrayList; |
Formats disponibles : Unified diff