|
1 |
package tests
|
|
2 |
|
|
3 |
import org.txm.rcpapplication.views.*
|
|
4 |
import org.txm.searchengine.cqp.corpus.*
|
|
5 |
|
|
6 |
|
|
7 |
/*
|
|
8 |
import java.awt.Dimension
|
|
9 |
import java.util.UUID;
|
|
10 |
|
|
11 |
import javax.swing.JFrame;
|
|
12 |
import javax.swing.JOptionPane;
|
|
13 |
|
|
14 |
import org.txm.Toolbox
|
|
15 |
import org.txm.searchengine.cqp.MemCqiClient
|
|
16 |
import org.txm.searchengine.cqp.MemCqiServer
|
|
17 |
import org.txm.searchengine.cqp.corpus.*
|
|
18 |
import org.txm.functions.ReferencePattern
|
|
19 |
import org.txm.rcpapplication.views.CorporaView
|
|
20 |
import org.txm.searchengine.cqp.corpus.query.Query
|
|
21 |
*/
|
|
22 |
|
|
23 |
|
|
24 |
/**
|
|
25 |
* Create partition with advanced CQL queries and autoname the parts.
|
|
26 |
* Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit.
|
|
27 |
* Can define some structural units to remove from the resulting parts subcorpus.
|
|
28 |
*
|
|
29 |
*/
|
|
30 |
|
|
31 |
// TODO : Log
|
|
32 |
println "******************************** Starting ********************************************";
|
|
33 |
|
|
34 |
|
|
35 |
// *************************** User parameters ************************************************************
|
|
36 |
|
|
37 |
|
|
38 |
// Test 1 : partition en excluant des sections
|
|
39 |
def CORPUS_NAME = "LIVRETOPERA2" // The name of the corpus to partition
|
|
40 |
|
|
41 |
def SUBCORPUS_NAME = null // The name of the subcorpus to partition
|
|
42 |
//def SUBCORPUS_NAME = "TEST" // The name of the subcorpus to partition if needed
|
|
43 |
// FIXME: subcorpus management is bugged, when trying to create a CA on a Partition created on a subcorpus, it doesn't work because of the getLexicon() method which throws a CqiCqpErrorErrorGeneral exception
|
|
44 |
|
|
45 |
PartitionQueriesGenerator.STRUCTURAL_UNITS = ['sp']; // Applying the partition on these structural units
|
|
46 |
PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['who']; // Applying the partition on these properties of structural units defined above
|
|
47 |
|
|
48 |
PartitionQueriesGenerator.STRUCTURAL_UNITS_TO_IGNORE = ['speaker']; // These structural units will be removed from the partition. NOTE : If doing that you need to define an EXPAND_TARGET
|
|
49 |
// if you want some subcorpus parts that will manage sequential positions queries
|
|
50 |
|
|
51 |
PartitionQueriesGenerator.EXPAND_TARGET = 'lg'; // Expand the results to this structural unit parent target. If some structural units to ignore are defined
|
|
52 |
// and the expand target is upper on the hierarchy than them, the structural units to ignore WON'T be ignored
|
|
53 |
|
|
54 |
//PartitionQueriesGenerator.PARTITION_NAME = 'gugu'; // The partition name. If empty or not defined, the partition will be autonamed regarding of the structural units and properties
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
// Test 2 : partition sur arborescence (multi-niveau sur 2 niveaux)
|
|
60 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
|
|
61 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1'];
|
|
62 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name'];
|
|
63 |
|
|
64 |
|
|
65 |
// Test 3 : partition sur arborescence (multi-niveau sur 3 niveaux)
|
|
66 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
|
|
67 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2'];
|
|
68 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'name'];
|
|
69 |
|
|
70 |
|
|
71 |
// Test 4 : partition sur arborescence (multi-niveau sur 4 niveaux)
|
|
72 |
// Ajouter le 'n' permet par exemple ici de trier les parties par ordre de scène car sinon le tri est problématique, ex : "SCENE II" passe avant "SCENE PREMIERE"
|
|
73 |
//def CORPUS_NAME = "LIVRETOPERA2"
|
|
74 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2', 'div2'];
|
|
75 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'n', 'name'];
|
|
76 |
|
|
77 |
|
|
78 |
// Test 5 : partitions croisées (sur plusieurs propriétés d'une même structure)
|
|
79 |
//def CORPUS_NAME = "DISCOURS"
|
|
80 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'text'];
|
|
81 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['loc', 'type'];
|
|
82 |
|
|
83 |
|
|
84 |
|
|
85 |
// Tests
|
|
86 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
|
|
87 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1', 'sp'];
|
|
88 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['name', 'who'];
|
|
89 |
|
|
90 |
// Tests
|
|
91 |
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
|
|
92 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1'];
|
|
93 |
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['n'];
|
|
94 |
//PartitionQueriesGenerator.PART_NAMES_PREFIX = 'act_';
|
|
95 |
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
// *************************** Debug parameters ************************************************************
|
|
100 |
|
|
101 |
|
|
102 |
PartitionQueriesGenerator.DEBUG = 0; // If DEBUG != 0 then partition is not created,
|
|
103 |
// script only outputs the created queries and part names strings in console
|
|
104 |
|
|
105 |
|
|
106 |
|
|
107 |
// *************************** End of parameters ************************************************************
|
|
108 |
|
|
109 |
|
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
|
115 |
// Running
|
|
116 |
def partition = PartitionQueriesGenerator.createPartition(CORPUS_NAME, SUBCORPUS_NAME);
|
|
117 |
|
|
118 |
|
|
119 |
// Refreshing the RCP component
|
|
120 |
if(partition != null) {
|
|
121 |
monitor.syncExec(new Runnable() {
|
|
122 |
public void run() {
|
|
123 |
CorporaView.refresh();
|
|
124 |
CorporaView.expand(partition.getParent());
|
|
125 |
}
|
|
126 |
});
|
|
127 |
}
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
|
133 |
/**
|
|
134 |
* Create a list of queries and part names regarding the structural units, structural units properties, structural units to ignore user defined lists and expand target value specified.
|
|
135 |
* @author s
|
|
136 |
*
|
|
137 |
*/
|
|
138 |
public class PartitionQueriesGenerator {
|
|
139 |
|
|
140 |
public static int DEBUG = 0; // si DEBUG != 0, alors les requêtes sont affichées mais la partition n'est pas créée
|
|
141 |
|
|
142 |
public static String PARTITION_NAME = '';
|
|
143 |
public static String[] STRUCTURAL_UNITS = []; // Liste des unités structurelles sur lesquelles effectuer la partition, ex: ['text', 'div1']
|
|
144 |
public static String[] STRUCTURAL_UNITS_PROPERTIES = []; // Propriétés des unités structurelles sur lesquelles effectuer la partition, ex : ['id', 'name']
|
|
145 |
public static String[] STRUCTURAL_UNITS_TO_IGNORE = []; // Structure à ignorer, ex. CQL : !speaker
|
|
146 |
public static String PART_NAMES_PREFIX = '';
|
|
147 |
public static String EXPAND_TARGET = null; // Expand to target, englobe les empans jusqu'à la balise parente spécifiée. NOTE : Le expand entre en conflit avec les sections à ignorer.
|
|
148 |
// Si la target est à un niveau supérieur aux balises à ignorer, il les remet dans liste de résultats CWB et elles ne sont donc pas ignorées
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
public static ArrayList<String> queries = new ArrayList<String>();
|
|
153 |
public static ArrayList<String> partNames = new ArrayList<String>();
|
|
154 |
|
|
155 |
|
|
156 |
/**
|
|
157 |
* Init the generator and process.
|
|
158 |
* @param corpusName
|
|
159 |
*/
|
|
160 |
public static Partition createPartition(String corpusName, String subcorpusName) {
|
|
161 |
|
|
162 |
if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) {
|
|
163 |
|
|
164 |
// TODO : Log
|
|
165 |
println '**************************************************************************************************************'
|
|
166 |
println 'Creating the queries on corpus "' + corpusName + "'" ;
|
|
167 |
|
|
168 |
Corpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusName);
|
|
169 |
|
|
170 |
|
|
171 |
// Subcorpora
|
|
172 |
if(subcorpusName != null) {
|
|
173 |
corpus = corpus.getSubcorpusByName(subcorpusName);
|
|
174 |
}
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
// Recursing through the corpus and subcorpus
|
|
179 |
process(corpus, 0, '', '');
|
|
180 |
|
|
181 |
// Finalizing the queries
|
|
182 |
finalizeQueries();
|
|
183 |
|
|
184 |
|
|
185 |
// TODO : Debug
|
|
186 |
// Displaying the partition name
|
|
187 |
println '';
|
|
188 |
println 'PARTITION_NAME: ' + PartitionQueriesGenerator.PARTITION_NAME;
|
|
189 |
|
|
190 |
// Displaying the queries
|
|
191 |
println 'Queries (count = ' + PartitionQueriesGenerator.queries.size() + '):';
|
|
192 |
for(query in PartitionQueriesGenerator.queries) {
|
|
193 |
println query;
|
|
194 |
}
|
|
195 |
// Displaying the part names
|
|
196 |
println 'Partnames (count = ' + PartitionQueriesGenerator.partNames.size() + '):';
|
|
197 |
for(partName in PartitionQueriesGenerator.partNames) {
|
|
198 |
print partName + ' / ';
|
|
199 |
}
|
|
200 |
|
|
201 |
|
|
202 |
// TODO : Log
|
|
203 |
println 'Queries created.';
|
|
204 |
|
|
205 |
// Creating the partition
|
|
206 |
if(DEBUG == 0 && queries.size() == partNames.size()) {
|
|
207 |
return corpus.createPartition(PARTITION_NAME, queries, partNames);
|
|
208 |
}
|
|
209 |
|
|
210 |
}
|
|
211 |
else {
|
|
212 |
// TODO : Log
|
|
213 |
println 'Structural units count or structural units properties count error.';
|
|
214 |
return null
|
|
215 |
}
|
|
216 |
|
|
217 |
}
|
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
|
|
222 |
/**
|
|
223 |
* Recurse through structural units and structural units properties of corpus and create the queries and the part names.
|
|
224 |
* @param corpus the corpus or subcorpus
|
|
225 |
* @param index the index for recursion
|
|
226 |
* @param tmpQuery the temporary query for creating subcorpus part
|
|
227 |
* @param tmpPartName the temporary part name of the subcorpus part
|
|
228 |
*/
|
|
229 |
protected static void process(Corpus corpus, int index, String tmpQuery, String tmpPartName) {
|
|
230 |
|
|
231 |
|
|
232 |
// End of array
|
|
233 |
if(index >= STRUCTURAL_UNITS.size()) {
|
|
234 |
|
|
235 |
queries.add(tmpQuery);
|
|
236 |
partNames.add(PART_NAMES_PREFIX + tmpPartName);
|
|
237 |
|
|
238 |
return;
|
|
239 |
}
|
|
240 |
|
|
241 |
StructuralUnit su = corpus.getStructuralUnit(STRUCTURAL_UNITS[index]);
|
|
242 |
StructuralUnitProperty sup = su.getProperty(STRUCTURAL_UNITS_PROPERTIES[index]);
|
|
243 |
|
|
244 |
// TODO : Log
|
|
245 |
println ''
|
|
246 |
if(index == 0) {
|
|
247 |
println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on mother corpus "' + corpus.getName() + '"';
|
|
248 |
}
|
|
249 |
else {
|
|
250 |
println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on subcorpus part "' + tmpPartName + '"';
|
|
251 |
}
|
|
252 |
println ''
|
|
253 |
|
|
254 |
|
|
255 |
// Creating the queries parts for each structural units properties values
|
|
256 |
//for (supValue in sup.getOrderedValues()) { // TODO : signaler bug Matthieu, on ne devrait pas être obligé de repasser le sous-corpus à la méthode car sup a déjà été créée depuis le sous-corpus ? getValues() bugge aussi
|
|
257 |
for (supValue in sup.getOrderedValues(corpus)) {
|
|
258 |
|
|
259 |
// TODO : Log
|
|
260 |
println ''
|
|
261 |
println 'Value "' + supValue + '"';
|
|
262 |
println ''
|
|
263 |
|
|
264 |
|
|
265 |
// Getting the subcorpus linked to the structural unit property value
|
|
266 |
Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp" + UUID.randomUUID());
|
|
267 |
|
|
268 |
// Partition conditions and part name separators
|
|
269 |
String and = '';
|
|
270 |
String underscore = '';
|
|
271 |
if(tmpQuery != '') {
|
|
272 |
underscore = '_';
|
|
273 |
and = ' & ';
|
|
274 |
}
|
|
275 |
|
|
276 |
|
|
277 |
process(tmpSubcorpus, index + 1, (tmpQuery + and + '_.' + sup.getFullName() + '="' + supValue + '"'), tmpPartName + underscore + supValue);
|
|
278 |
|
|
279 |
// Deleting the temporary subcorpus
|
|
280 |
// TODO : bug : cette méthode ne supprime pas le corpus sans doute car il faut que le sous-corpus ne contienne pas d'autres sous-corpus ? le delete() en revanche fonctionne.
|
|
281 |
// corpus.dropSubcorpus(tmpSubcorpus);
|
|
282 |
tmpSubcorpus.delete();
|
|
283 |
|
|
284 |
}
|
|
285 |
}
|
|
286 |
|
|
287 |
|
|
288 |
/**
|
|
289 |
* Autoname the partition.
|
|
290 |
* @param partitionName
|
|
291 |
*/
|
|
292 |
protected static void autoNamePartition(String partitionName) {
|
|
293 |
|
|
294 |
// Structural units names and properties
|
|
295 |
for(int i = 0; i < STRUCTURAL_UNITS.size(); i++) {
|
|
296 |
partitionName += STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '.';
|
|
297 |
}
|
|
298 |
|
|
299 |
// Structural units to ignore
|
|
300 |
for(int i = 0; i < STRUCTURAL_UNITS_TO_IGNORE.size(); i++) {
|
|
301 |
partitionName += 'NOT_' + STRUCTURAL_UNITS_TO_IGNORE[i] + '.';
|
|
302 |
}
|
|
303 |
|
|
304 |
|
|
305 |
// Removing last point in name
|
|
306 |
PARTITION_NAME = partitionName.substring(0, partitionName.length() - 1);
|
|
307 |
}
|
|
308 |
|
|
309 |
|
|
310 |
/**
|
|
311 |
* Finalize the queries.
|
|
312 |
*/
|
|
313 |
protected static void finalizeQueries() {
|
|
314 |
|
|
315 |
String expandTo = '';
|
|
316 |
// Expanding to user defined target
|
|
317 |
if(EXPAND_TARGET != null && EXPAND_TARGET != '') {
|
|
318 |
expandTo = ' expand to ' + EXPAND_TARGET;
|
|
319 |
}
|
|
320 |
// Expanding to last child structural unit in user defined hierarchy
|
|
321 |
else if(STRUCTURAL_UNITS_TO_IGNORE.size() == 0) {
|
|
322 |
expandTo = ' expand to ' + STRUCTURAL_UNITS[STRUCTURAL_UNITS.size() - 1];
|
|
323 |
}
|
|
324 |
|
|
325 |
// Autonaming the partition
|
|
326 |
if(PARTITION_NAME == '') {
|
|
327 |
autoNamePartition(PARTITION_NAME);
|
|
328 |
// Finalizing partition name
|
|
329 |
PARTITION_NAME += expandTo.replace(' expand to', '.EXPAND TO').replace(' ', '_');
|
|
330 |
}
|
|
331 |
|
|
332 |
|
|
333 |
// Finalizing queries
|
|
334 |
for(int j = 0; j < queries.size(); j++) {
|
|
335 |
|
|
336 |
String queryEnd = '';
|
|
337 |
|
|
338 |
// Removing some sections
|
|
339 |
for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
|
|
340 |
queryEnd += ' & !' + sectionToIgnore;
|
|
341 |
}
|
|
342 |
|
|
343 |
queryEnd += ']' + expandTo;
|
|
344 |
|
|
345 |
queries.set(j, '[' + queries.get(j) + queryEnd);
|
|
346 |
}
|
|
347 |
}
|
|
348 |
|
|
349 |
}
|
|
350 |
|
|
351 |
|
|
352 |
|
|
353 |
|
|
354 |
|
|
355 |
|
|
356 |
/*
|
|
357 |
// Test dialogue de confirmation avant création de la partition
|
|
358 |
JFrame frame = new JFrame('test');
|
|
359 |
//frame.setMinimumSize(new Dimension(400, 400));
|
|
360 |
//frame.setVisible(true);
|
|
361 |
//frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
|
362 |
int n = JOptionPane.showConfirmDialog(
|
|
363 |
frame, "Would you like green eggs and ham?",
|
|
364 |
"An Inane Question",
|
|
365 |
JOptionPane.YES_NO_OPTION);
|
|
366 |
if (n == JOptionPane.YES_OPTION) {
|
|
367 |
frame.setTitle("Ewww!");
|
|
368 |
} else if (n == JOptionPane.NO_OPTION) {
|
|
369 |
frame.setTitle("Me neither!");
|
|
370 |
} else {
|
|
371 |
frame.setTitle("Come on -- tell me!");
|
|
372 |
}*/
|
|
373 |
|
|
374 |
|
|
375 |
|
|
376 |
|
|
377 |
|
|
378 |
//// Récupération des unités structurelles et création de la liste des requêtes CQL
|
|
379 |
//int i = 0;
|
|
380 |
//ArrayList<String> queries = new ArrayList<String>();
|
|
381 |
//ArrayList<String> partNames = new ArrayList<String>();
|
|
382 |
//
|
|
383 |
//
|
|
384 |
//int currentQueriesCount;
|
|
385 |
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) {
|
|
386 |
//
|
|
387 |
// // Parcours des unités strcuturelles à traiter
|
|
388 |
// for (suName in STRUCTURAL_UNITS) {
|
|
389 |
//
|
|
390 |
// // Définition auto du nom de partition
|
|
391 |
// PARTITION_NAME += suName + '_';
|
|
392 |
//
|
|
393 |
//
|
|
394 |
// StructuralUnit su = corpus.getStructuralUnit(suName);
|
|
395 |
//
|
|
396 |
//
|
|
397 |
// // TODO : Debug
|
|
398 |
// println ''
|
|
399 |
// println 'Structural Unit Name: ' + su.getName();
|
|
400 |
//
|
|
401 |
//
|
|
402 |
// // Récupération des propriétés d'unités structurelles
|
|
403 |
// for (StructuralUnitProperty sup in su.getProperties()) {
|
|
404 |
//
|
|
405 |
//
|
|
406 |
// if(STRUCTURAL_UNITS_PROPERTIES[i] != '' && sup.getName() == STRUCTURAL_UNITS_PROPERTIES[i]) {
|
|
407 |
//
|
|
408 |
// // Définition auto du nom de partition
|
|
409 |
// PARTITION_NAME += STRUCTURAL_UNITS_PROPERTIES[i] + ".";
|
|
410 |
//
|
|
411 |
// // TODO : Debug
|
|
412 |
// println "Structural Unit Property: " + sup.getName() + " ";
|
|
413 |
//
|
|
414 |
//
|
|
415 |
// // Récupération des valeurs des propriétés d'unités structurelles
|
|
416 |
//
|
|
417 |
// //for (supValue in sup.getOrderedValues()) {
|
|
418 |
// for (supValue in sup.getOrderedValues()) {
|
|
419 |
//
|
|
420 |
// // Récupération du sous-corpus lié à la valeur de propriété de structure
|
|
421 |
// Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp");
|
|
422 |
//
|
|
423 |
// String query = suName + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '="' + supValue + '"';
|
|
424 |
//
|
|
425 |
//
|
|
426 |
// queries.add(query);
|
|
427 |
// partNames.add(supValue);
|
|
428 |
//
|
|
429 |
// // TODO : Debug
|
|
430 |
// print supValue + " ";
|
|
431 |
//
|
|
432 |
// // Suppression du sous-corpus temporaire
|
|
433 |
// corpus.dropSubcorpus(tmpSubcorpus);
|
|
434 |
//
|
|
435 |
// }
|
|
436 |
//
|
|
437 |
// }
|
|
438 |
// }
|
|
439 |
//
|
|
440 |
//
|
|
441 |
//// // Ajout des niveaux inférieurs
|
|
442 |
//// for(int i = 1; i < STRUCTURAL_UNITS.size(); i++) {
|
|
443 |
////
|
|
444 |
//// for(int j = 0; j < queries.size(); j++) {
|
|
445 |
//// queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"');
|
|
446 |
////
|
|
447 |
//// // Noms de partitions
|
|
448 |
//// partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i]);
|
|
449 |
//// }
|
|
450 |
//// }
|
|
451 |
//
|
|
452 |
//
|
|
453 |
//
|
|
454 |
//
|
|
455 |
//
|
|
456 |
//
|
|
457 |
// i++;
|
|
458 |
// // }
|
|
459 |
// // else {
|
|
460 |
// //
|
|
461 |
// // }
|
|
462 |
//
|
|
463 |
// }
|
|
464 |
//
|
|
465 |
//
|
|
466 |
//
|
|
467 |
//
|
|
468 |
//
|
|
469 |
// // Clôture des requêtes
|
|
470 |
// for(int j = 0; j < queries.size(); j++) {
|
|
471 |
//
|
|
472 |
// String queryEnd = '';
|
|
473 |
//
|
|
474 |
// // Suppression de sections
|
|
475 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
|
|
476 |
// queryEnd += ' & !' + sectionToIgnore;
|
|
477 |
// }
|
|
478 |
//
|
|
479 |
// queryEnd += ']';
|
|
480 |
//
|
|
481 |
// // Expand to target
|
|
482 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') {
|
|
483 |
// queryEnd += ' expand to ' + EXPAND_TARGET;
|
|
484 |
// }
|
|
485 |
//
|
|
486 |
// queries.set(j, '[_.' + queries.get(j) + queryEnd);
|
|
487 |
// }
|
|
488 |
//}
|
|
489 |
|
|
490 |
|
|
491 |
|
|
492 |
|
|
493 |
|
|
494 |
|
|
495 |
|
|
496 |
|
|
497 |
|
|
498 |
|
|
499 |
|
|
500 |
|
|
501 |
|
|
502 |
|
|
503 |
|
|
504 |
|
|
505 |
////for (suName in STRUCTURAL_UNITS) {
|
|
506 |
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size()) {
|
|
507 |
//
|
|
508 |
// suName = STRUCTURAL_UNITS[0]
|
|
509 |
// //if(STRUCTURAL_UNITS.size() > 1) {
|
|
510 |
//
|
|
511 |
// // Définition auto du nom de partition
|
|
512 |
// PARTITION_NAME += suName + '_';
|
|
513 |
//
|
|
514 |
//
|
|
515 |
// // TODO : Debug
|
|
516 |
// println ''
|
|
517 |
// println "Structural Unit Name: $suName"
|
|
518 |
//
|
|
519 |
// // Récupération des valeurs des propriétés d'unités structurelles
|
|
520 |
// for (sup in discours.getStructuralUnitProperties(suName.asType(String))) {
|
|
521 |
//
|
|
522 |
// supName = STRUCTURAL_UNITS_PROPERTIES[0];
|
|
523 |
//
|
|
524 |
// if(supName != '' && sup.getName() == supName) {
|
|
525 |
//
|
|
526 |
// // Définition auto du nom de partition
|
|
527 |
// PARTITION_NAME += supName + ".";
|
|
528 |
//
|
|
529 |
// // TODO : Debug
|
|
530 |
// println "Structural Unit Property: " + supName + " ";
|
|
531 |
//
|
|
532 |
// for (supValue in sup.getOrderedValues()) {
|
|
533 |
//
|
|
534 |
// String query = '[_.' + suName + '_' + supName + '="' + supValue + '"';
|
|
535 |
//
|
|
536 |
// // Suppression de sections
|
|
537 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
|
|
538 |
// query += ' & !' + sectionToIgnore;
|
|
539 |
// }
|
|
540 |
//
|
|
541 |
//
|
|
542 |
//
|
|
543 |
// queries.add(query);
|
|
544 |
// partNames.add(supValue);
|
|
545 |
//
|
|
546 |
// // TODO : Debug
|
|
547 |
// print supValue + " ";
|
|
548 |
// }
|
|
549 |
// }
|
|
550 |
// }
|
|
551 |
//
|
|
552 |
//
|
|
553 |
// // Ajout des niveaux inférieurs
|
|
554 |
// for(int i = 1; i < STRUCTURAL_UNITS.size(); i++) {
|
|
555 |
//
|
|
556 |
// for(int j = 0; j < queries.size(); j++) {
|
|
557 |
// queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"');
|
|
558 |
//
|
|
559 |
// // Noms de partitions
|
|
560 |
// partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i]);
|
|
561 |
// }
|
|
562 |
// }
|
|
563 |
//
|
|
564 |
//
|
|
565 |
// // Clôture des requêtes
|
|
566 |
// for(int j = 0; j < queries.size(); j++) {
|
|
567 |
//
|
|
568 |
// String queryEnd = ']';
|
|
569 |
//
|
|
570 |
// // Expand to target
|
|
571 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') {
|
|
572 |
// queryEnd += ' expand to ' + EXPAND_TARGET;
|
|
573 |
// }
|
|
574 |
//
|
|
575 |
// queries.set(j, queries.get(j) + queryEnd);
|
|
576 |
// }
|
|
577 |
//
|
|
578 |
//
|
|
579 |
//
|
|
580 |
// //i++;
|
|
581 |
//// }
|
|
582 |
//// else {
|
|
583 |
////
|
|
584 |
//// }
|
|
585 |
//
|
|
586 |
////}
|
|
587 |
//}
|
|
588 |
|
|
589 |
|
|
590 |
|
|
591 |
// Fonctionnelle pour 1 niveau de hiérarchie
|
|
592 |
//// Récupération des unités structurelles et création de la liste des requêtes CQL
|
|
593 |
//int i = 0;
|
|
594 |
//String supName;
|
|
595 |
//ArrayList queries = new ArrayList();
|
|
596 |
//ArrayList partNames = new ArrayList();
|
|
597 |
//
|
|
598 |
//for (suName in STRUCTURAL_UNITS) {
|
|
599 |
//
|
|
600 |
//
|
|
601 |
// // Définition auto du nom de partition
|
|
602 |
// NAME += suName + '_';
|
|
603 |
//
|
|
604 |
//
|
|
605 |
//
|
|
606 |
// println "Structural Unit Name: $suName"
|
|
607 |
//
|
|
608 |
// // Récupération des valeurs des propriétés d'unités structurelles
|
|
609 |
// for (sup in discours.getStructuralUnitProperties(suName.asType(String))) {
|
|
610 |
//
|
|
611 |
// supName = STRUCTURAL_UNITS_PROPERTIES[i];
|
|
612 |
//
|
|
613 |
// if(supName != '' && sup.getName() == supName) {
|
|
614 |
//
|
|
615 |
// // Définition auto du nom de partition
|
|
616 |
// NAME += supName + ".";
|
|
617 |
//
|
|
618 |
// // TODO : Debug
|
|
619 |
// println "Structural Unit Property: " + supName + " ";
|
|
620 |
//
|
|
621 |
// for (supValue in sup.getOrderedValues()) {
|
|
622 |
//
|
|
623 |
// String query = '[_.' + suName + '_' + supName + '="' + supValue + '"';
|
|
624 |
//
|
|
625 |
// // Suppression de sections
|
|
626 |
// for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE) {
|
|
627 |
// query += ' & !' + sectionToIgnore;
|
|
628 |
// }
|
|
629 |
//
|
|
630 |
// query += ']';
|
|
631 |
//
|
|
632 |
// // Expand to target
|
|
633 |
// if(EXPAND_TARGET != null && EXPAND_TARGET != '') {
|
|
634 |
// query += ' expand to ' + EXPAND_TARGET;
|
|
635 |
// }
|
|
636 |
//
|
|
637 |
//
|
|
638 |
// queries.add(query);
|
|
639 |
// partNames.add(supValue);
|
|
640 |
//
|
|
641 |
// // TODO : Debug
|
|
642 |
// print supValue + " ";
|
|
643 |
// }
|
|
644 |
// }
|
|
645 |
// }
|
|
646 |
// i++;
|
|
647 |
//}
|
|
648 |
|
|
649 |
|
|
650 |
|
|
651 |
|
|
652 |
//// start
|
|
653 |
//if (queries.size() == partnames.size()) {
|
|
654 |
//// def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS)
|
|
655 |
// def partition;
|
|
656 |
// if (SUBCORPUS == null) {
|
|
657 |
// println "partition build with $discours"
|
|
658 |
// partition = discours.createPartition(NAME, queries.as, PARTNAMES)
|
|
659 |
// } else {
|
|
660 |
// def subcorpus
|
|
661 |
// if (SUBCORPUSQUERY == null) {
|
|
662 |
// println "partition build with subcorpus by name $SUBCORPUS"
|
|
663 |
// subcorpus = discours.getSubcorpusByName(SUBCORPUS)
|
|
664 |
// if (subcorpus == null) {
|
|
665 |
// println "Error: SUBCORPUS NOT FOUND with name : $SUBCORPUS"
|
|
666 |
// }
|
|
667 |
// } else {
|
|
668 |
// println "partition build with subcorpus with query $SUBCORPUSQUERY"
|
|
669 |
// subcorpus = discours.createSubcorpus(new Query(SUBCORPUSQUERY), SUBCORPUS)
|
|
670 |
// }
|
|
671 |
// partition = subcorpus.createPartition(NAME, QUERIES, PARTNAMES)
|
|
672 |
// }
|
|
673 |
//
|
|
674 |
// def list = Arrays.asList(partition.getPartSizes())
|
|
675 |
// println "Partition created $NAME: "+list+" parts"
|