Révision 321

tmp/org.txm.groovy.core/src/groovy/tests/advancedPartition.groovy (revision 321)
1
package tests
2

  
3
import org.txm.rcpapplication.views.*
4
import org.txm.searchengine.cqp.corpus.*
5

  
6

  
7
/*
8
import java.awt.Dimension
9
import java.util.UUID;
10

  
11
import javax.swing.JFrame;
12
import javax.swing.JOptionPane;
13

  
14
import org.txm.Toolbox
15
import org.txm.searchengine.cqp.MemCqiClient
16
import org.txm.searchengine.cqp.MemCqiServer
17
import org.txm.searchengine.cqp.corpus.*
18
import org.txm.functions.ReferencePattern
19
import org.txm.rcpapplication.views.CorporaView
20
import org.txm.searchengine.cqp.corpus.query.Query
21
*/
22

  
23

  
24
/**
25
 * Create partition with advanced CQL queries and autoname the parts.
26
 * Can create partitions by defining a multi-level structural units hierarchy or by defining several properties values for one structural unit.
27
 * Can define some structural units to remove from the resulting parts subcorpus.
28
 *
29
 */
30

  
31
// TODO : Log
32
println "******************************** Starting ********************************************";
33

  
34

  
35
// *************************** User parameters ************************************************************
36

  
37

  
38
// Test 1 : partition en excluant des sections
39
def CORPUS_NAME = "LIVRETOPERA2"								// The name of the corpus to partition
40

  
41
def SUBCORPUS_NAME = null										// The name of the subcorpus to partition
42
//def SUBCORPUS_NAME = "TEST"										// The name of the subcorpus to partition if needed
43
// FIXME: subcorpus management is bugged, when trying to create a CA on a Partition created on a subcorpus, it doesn't work because of the getLexicon() method which throws a CqiCqpErrorErrorGeneral exception
44

  
45
PartitionQueriesGenerator.STRUCTURAL_UNITS = ['sp']; 					// Applying the partition on these structural units
46
PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['who'];		// Applying the partition on these properties of structural units defined above
47

  
48
PartitionQueriesGenerator.STRUCTURAL_UNITS_TO_IGNORE = ['speaker'];	// These structural units will be removed from the partition. NOTE : If doing that you need to define an EXPAND_TARGET
49
																		// if you want some subcorpus parts that will manage sequential positions queries
50

  
51
PartitionQueriesGenerator.EXPAND_TARGET = 'lg'; 						// Expand the results to this structural unit parent target. If some structural units to ignore are defined
52
																		// and the expand target is upper on the hierarchy than them, the structural units to ignore WON'T be ignored
53

  
54
//PartitionQueriesGenerator.PARTITION_NAME = 'gugu';					// The partition name. If empty or not defined, the partition will be autonamed regarding of the structural units and properties
55

  
56

  
57

  
58

  
59
// Test 2 : partition sur arborescence (multi-niveau sur 2 niveaux)
60
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
61
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1'];
62
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name'];
63

  
64

  
65
// Test 3 : partition sur arborescence (multi-niveau sur 3 niveaux)
66
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
67
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2'];
68
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'name'];
69

  
70

  
71
// Test 4 : partition sur arborescence (multi-niveau sur 4 niveaux)
72
// Ajouter le 'n' permet par exemple ici de trier les parties par ordre de scène car sinon le tri est problématique, ex : "SCENE II" passe avant "SCENE PREMIERE"
73
//def CORPUS_NAME = "LIVRETOPERA2"
74
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'div1', 'div2', 'div2'];
75
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['id', 'name', 'n', 'name'];
76

  
77

  
78
// Test 5 : partitions croisées (sur plusieurs propriétés d'une même structure)
79
//def CORPUS_NAME = "DISCOURS"
80
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['text', 'text'];
81
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['loc', 'type'];
82

  
83

  
84

  
85
// Tests
86
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
87
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1', 'sp'];
88
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['name', 'who'];
89

  
90
// Tests
91
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
92
//PartitionQueriesGenerator.STRUCTURAL_UNITS = ['div1'];
93
//PartitionQueriesGenerator.STRUCTURAL_UNITS_PROPERTIES = ['n'];
94
//PartitionQueriesGenerator.PART_NAMES_PREFIX = 'act_';
95

  
96

  
97

  
98

  
99
// *************************** Debug parameters ************************************************************
100

  
101

  
102
PartitionQueriesGenerator.DEBUG = 0; 	// If DEBUG != 0 then partition is not created,
103
										// script only outputs the created queries and part names strings in console
104

  
105

  
106

  
107
// *************************** End of parameters ************************************************************
108

  
109

  
110

  
111

  
112

  
113

  
114

  
115
// Running
116
def partition = PartitionQueriesGenerator.createPartition(CORPUS_NAME, SUBCORPUS_NAME);
117

  
118

  
119
// Refreshing the RCP component
120
if(partition != null)	{
121
	monitor.syncExec(new Runnable() {
122
			public void run() {
123
				CorporaView.refresh();
124
				CorporaView.expand(partition.getParent());
125
			}
126
		});
127
}
128

  
129

  
130

  
131

  
132

  
133
/**
134
 * Create a list of queries and part names regarding the structural units, structural units properties, structural units to ignore user defined lists and expand target value specified.
135
 * @author s
136
 *
137
 */
138
public class PartitionQueriesGenerator	{
139

  
140
	public static int DEBUG = 0;								// si DEBUG != 0, alors les requêtes sont affichées mais la partition n'est pas créée
141

  
142
	public static String PARTITION_NAME = '';
143
	public static String[] STRUCTURAL_UNITS = [];				// Liste des unités structurelles sur lesquelles effectuer la partition, ex: ['text', 'div1']
144
	public static String[] STRUCTURAL_UNITS_PROPERTIES = [];	// Propriétés des unités structurelles sur lesquelles effectuer la partition, ex : ['id', 'name']
145
	public static String[] STRUCTURAL_UNITS_TO_IGNORE = [];		// Structure à ignorer, ex. CQL : !speaker
146
	public static String PART_NAMES_PREFIX = '';
147
	public static String EXPAND_TARGET = null;					// Expand to target, englobe les empans jusqu'à la balise parente spécifiée. NOTE : Le expand entre en conflit avec les sections à ignorer.
148
																// Si la target est à un niveau supérieur aux balises à ignorer, il les remet dans liste de résultats CWB et elles ne sont donc pas ignorées
149

  
150

  
151

  
152
	public static ArrayList<String> queries = new ArrayList<String>();
153
	public static ArrayList<String> partNames = new ArrayList<String>();
154

  
155

  
156
	/**
157
	 * Init the generator and process.
158
	 * @param corpusName
159
	 */
160
	public static Partition createPartition(String corpusName, String subcorpusName)	{
161

  
162
		if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size())	{
163

  
164
			// TODO : Log
165
			println '**************************************************************************************************************'
166
			println 'Creating the queries on corpus "' + corpusName + "'" ;
167

  
168
			Corpus corpus = CorpusManager.getCorpusManager().getCorpus(corpusName);
169

  
170

  
171
			// Subcorpora
172
			if(subcorpusName != null)	{
173
				corpus = corpus.getSubcorpusByName(subcorpusName);
174
			}
175

  
176

  
177

  
178
			// Recursing through the corpus and subcorpus
179
			process(corpus, 0, '', '');
180

  
181
			// Finalizing the queries
182
			finalizeQueries();
183

  
184

  
185
			// TODO : Debug
186
			// Displaying the partition name
187
			println '';
188
			println 'PARTITION_NAME: ' + PartitionQueriesGenerator.PARTITION_NAME;
189

  
190
			// Displaying the queries
191
			println 'Queries (count = ' + PartitionQueriesGenerator.queries.size() + '):';
192
			for(query in PartitionQueriesGenerator.queries)	{
193
				println query;
194
			}
195
			// Displaying the part names
196
			println 'Partnames (count = ' + PartitionQueriesGenerator.partNames.size() + '):';
197
			for(partName in PartitionQueriesGenerator.partNames)	{
198
				print partName + ' / ';
199
			}
200

  
201

  
202
			// TODO : Log
203
			println 'Queries created.';
204

  
205
			// Creating the partition
206
			if(DEBUG == 0 && queries.size() == partNames.size()) {
207
				return corpus.createPartition(PARTITION_NAME, queries, partNames);
208
			}
209

  
210
		}
211
		else	{
212
			// TODO : Log
213
			println 'Structural units count or structural units properties count error.';
214
			return null
215
		}
216

  
217
	}
218

  
219

  
220

  
221

  
222
	/**
223
	 * Recurse through structural units and structural units properties of corpus and create the queries and the part names.
224
	 * @param corpus the corpus or subcorpus
225
	 * @param index the index for recursion
226
	 * @param tmpQuery the temporary query for creating subcorpus part
227
	 * @param tmpPartName the temporary part name of the subcorpus part
228
	 */
229
	protected static void process(Corpus corpus, int index, String tmpQuery, String tmpPartName)	{
230

  
231

  
232
		// End of array
233
		if(index >= STRUCTURAL_UNITS.size())	{
234

  
235
			queries.add(tmpQuery);
236
			partNames.add(PART_NAMES_PREFIX + tmpPartName);
237

  
238
			return;
239
		}
240

  
241
		StructuralUnit su = corpus.getStructuralUnit(STRUCTURAL_UNITS[index]);
242
		StructuralUnitProperty sup = su.getProperty(STRUCTURAL_UNITS_PROPERTIES[index]);
243

  
244
		// TODO : Log
245
		println ''
246
		if(index == 0)	{
247
			println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on mother corpus "' + corpus.getName() + '"';
248
		}
249
		else	{
250
			println 'Pocessing Structural Unit Property "' + sup.getFullName() + '" on subcorpus part "' + tmpPartName + '"';
251
		}
252
		println ''
253

  
254

  
255
		// Creating the queries parts for each structural units properties values
256
		//for (supValue in sup.getOrderedValues()) { // TODO : signaler bug Matthieu, on ne devrait pas être obligé de repasser le sous-corpus à la méthode car sup a déjà été créée depuis le sous-corpus ? getValues() bugge aussi
257
		for (supValue in sup.getOrderedValues(corpus)) {
258

  
259
			// TODO : Log
260
			println ''
261
			println 'Value "' + supValue + '"';
262
			println ''
263

  
264

  
265
			// Getting the subcorpus linked to the structural unit property value
266
			Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp" + UUID.randomUUID());
267

  
268
			// Partition conditions and part name separators
269
			String and = '';
270
			String underscore = '';
271
			if(tmpQuery != '')	{
272
				underscore = '_';
273
				and = ' & ';
274
			}
275

  
276

  
277
			process(tmpSubcorpus, index + 1, (tmpQuery + and + '_.' + sup.getFullName() + '="' + supValue + '"'), tmpPartName + underscore + supValue);
278

  
279
			// Deleting the temporary subcorpus
280
			// TODO : bug : cette méthode ne supprime pas le corpus sans doute car il faut que le sous-corpus ne contienne pas d'autres sous-corpus ? le delete() en revanche fonctionne.
281
//			corpus.dropSubcorpus(tmpSubcorpus);
282
			tmpSubcorpus.delete();
283

  
284
		}
285
	}
286

  
287

  
288
	/**
289
	 * 	Autoname the partition.
290
	 * @param partitionName
291
	 */
292
	protected static void autoNamePartition(String partitionName)	{
293

  
294
		// Structural units names and properties
295
		for(int i = 0; i < STRUCTURAL_UNITS.size(); i++)	{
296
			partitionName +=  STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '.';
297
		}
298

  
299
		// Structural units to ignore
300
		for(int i = 0; i < STRUCTURAL_UNITS_TO_IGNORE.size(); i++)	{
301
			partitionName +=  'NOT_' + STRUCTURAL_UNITS_TO_IGNORE[i] + '.';
302
		}
303

  
304

  
305
		// Removing last point in name
306
		PARTITION_NAME = partitionName.substring(0, partitionName.length() - 1);
307
	}
308

  
309

  
310
	/**
311
	 * Finalize the queries.
312
	 */
313
	protected static void finalizeQueries()	{
314

  
315
		String expandTo = '';
316
		// Expanding to user defined target
317
		if(EXPAND_TARGET != null && EXPAND_TARGET != '')	{
318
			expandTo = ' expand to ' + EXPAND_TARGET;
319
		}
320
		// Expanding to last child structural unit in user defined hierarchy
321
		else if(STRUCTURAL_UNITS_TO_IGNORE.size() == 0)	{
322
			expandTo = ' expand to ' + STRUCTURAL_UNITS[STRUCTURAL_UNITS.size() - 1];
323
		}
324

  
325
		// Autonaming the partition
326
		if(PARTITION_NAME == '')	{
327
			autoNamePartition(PARTITION_NAME);
328
			// Finalizing partition name
329
			PARTITION_NAME += expandTo.replace(' expand to', '.EXPAND TO').replace(' ', '_');
330
		}
331

  
332

  
333
		// Finalizing queries
334
		for(int j = 0; j < queries.size(); j++)	{
335

  
336
			String queryEnd = '';
337

  
338
			// Removing some sections
339
			for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE)	{
340
				queryEnd += ' & !' + sectionToIgnore;
341
			}
342

  
343
			queryEnd += ']' + expandTo;
344

  
345
			queries.set(j, '[' +  queries.get(j) + queryEnd);
346
		}
347
	}
348

  
349
}
350

  
351

  
352

  
353

  
354

  
355

  
356
/*
357
// Test dialogue de confirmation avant création de la partition
358
JFrame frame = new JFrame('test');
359
//frame.setMinimumSize(new Dimension(400, 400));
360
//frame.setVisible(true);
361
//frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
362
int n = JOptionPane.showConfirmDialog(
363
	frame, "Would you like green eggs and ham?",
364
	"An Inane Question",
365
	JOptionPane.YES_NO_OPTION);
366
if (n == JOptionPane.YES_OPTION) {
367
frame.setTitle("Ewww!");
368
} else if (n == JOptionPane.NO_OPTION) {
369
frame.setTitle("Me neither!");
370
} else {
371
frame.setTitle("Come on -- tell me!");
372
}*/
373

  
374

  
375

  
376

  
377

  
378
//// Récupération des unités structurelles et création de la liste des requêtes CQL
379
//int i = 0;
380
//ArrayList<String> queries = new ArrayList<String>();
381
//ArrayList<String> partNames = new ArrayList<String>();
382
//
383
//
384
//int currentQueriesCount;
385
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size())	{
386
//
387
//	// Parcours des unités strcuturelles à traiter
388
//	for (suName in STRUCTURAL_UNITS) {
389
//
390
//		// Définition auto du nom de partition
391
//		PARTITION_NAME += suName + '_';
392
//
393
//
394
//		StructuralUnit su = corpus.getStructuralUnit(suName);
395
//
396
//
397
//		// TODO : Debug
398
//		println ''
399
//		println 'Structural Unit Name: ' + su.getName();
400
//
401
//
402
//		// Récupération des propriétés d'unités structurelles
403
//		for (StructuralUnitProperty sup in su.getProperties())	{
404
//
405
//
406
//			if(STRUCTURAL_UNITS_PROPERTIES[i] != '' && sup.getName() == STRUCTURAL_UNITS_PROPERTIES[i])	{
407
//
408
//				// Définition auto du nom de partition
409
//				PARTITION_NAME += STRUCTURAL_UNITS_PROPERTIES[i] + ".";
410
//
411
//				// TODO : Debug
412
//				println "Structural Unit Property: " + sup.getName() + " ";
413
//
414
//
415
//				// Récupération des valeurs des propriétés d'unités structurelles
416
//
417
//				//for (supValue in sup.getOrderedValues()) {
418
//				for (supValue in sup.getOrderedValues()) {
419
//
420
//					// Récupération du sous-corpus lié à la valeur de propriété de structure
421
//					Subcorpus tmpSubcorpus = corpus.createSubcorpusWithQueryString(su, sup, supValue, "tmp");
422
//
423
//					String query = suName + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '="' + supValue + '"';
424
//
425
//
426
//					queries.add(query);
427
//					partNames.add(supValue);
428
//
429
//					// TODO : Debug
430
//					print supValue + " ";
431
//
432
//					// Suppression du sous-corpus temporaire
433
//					corpus.dropSubcorpus(tmpSubcorpus);
434
//
435
//				}
436
//
437
//			}
438
//		}
439
//
440
//
441
////		// Ajout des niveaux inférieurs
442
////		for(int i = 1; i < STRUCTURAL_UNITS.size(); i++)	{
443
////
444
////			for(int j = 0; j < queries.size(); j++)	{
445
////				queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"');
446
////
447
////				// Noms de partitions
448
////				partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_'  + STRUCTURAL_UNITS_PROPERTIES[i]);
449
////			}
450
////		}
451
//
452
//
453
//
454
//
455
//
456
//
457
//		i++;
458
//	//	}
459
//	//	else	{
460
//	//
461
//	//	}
462
//
463
//	}
464
//
465
//
466
//
467
//
468
//
469
//	// Clôture des requêtes
470
//	for(int j = 0; j < queries.size(); j++)	{
471
//
472
//		String queryEnd = '';
473
//
474
//		// Suppression de sections
475
//		for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE)	{
476
//			queryEnd += ' & !' + sectionToIgnore;
477
//		}
478
//
479
//		queryEnd += ']';
480
//
481
//		// Expand to target
482
//		if(EXPAND_TARGET != null && EXPAND_TARGET != '')	{
483
//			queryEnd += ' expand to ' + EXPAND_TARGET;
484
//		}
485
//
486
//		queries.set(j, '[_.' +  queries.get(j) + queryEnd);
487
//	}
488
//}
489

  
490

  
491

  
492

  
493

  
494

  
495

  
496

  
497

  
498

  
499

  
500

  
501

  
502

  
503

  
504

  
505
////for (suName in STRUCTURAL_UNITS) {
506
//if(STRUCTURAL_UNITS.size() > 0 && STRUCTURAL_UNITS.size() == STRUCTURAL_UNITS_PROPERTIES.size())	{
507
//
508
//	suName = STRUCTURAL_UNITS[0]
509
//	//if(STRUCTURAL_UNITS.size() > 1)	{
510
//
511
//	// Définition auto du nom de partition
512
//	PARTITION_NAME += suName + '_';
513
//
514
//
515
//	// TODO : Debug
516
//	println ''
517
//	println "Structural Unit Name: $suName"
518
//
519
//	// Récupération des valeurs des propriétés d'unités structurelles
520
//	for (sup in discours.getStructuralUnitProperties(suName.asType(String)))	{
521
//
522
//		supName = STRUCTURAL_UNITS_PROPERTIES[0];
523
//
524
//		if(supName != '' && sup.getName() == supName)	{
525
//
526
//			// Définition auto du nom de partition
527
//			PARTITION_NAME += supName + ".";
528
//
529
//			// TODO : Debug
530
//			println "Structural Unit Property: " + supName + " ";
531
//
532
//			for (supValue in sup.getOrderedValues()) {
533
//
534
//				String query = '[_.' + suName + '_' + supName + '="' + supValue + '"';
535
//
536
//				// Suppression de sections
537
//				for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE)	{
538
//					query += ' & !' + sectionToIgnore;
539
//				}
540
//
541
//
542
//
543
//				queries.add(query);
544
//				partNames.add(supValue);
545
//
546
//				// TODO : Debug
547
//				print supValue + " ";
548
//			}
549
//		}
550
//	}
551
//
552
//
553
//	// Ajout des niveaux inférieurs
554
//	for(int i = 1; i < STRUCTURAL_UNITS.size(); i++)	{
555
//
556
//		for(int j = 0; j < queries.size(); j++)	{
557
//			queries.set(j, queries.get(j) + ' & _.' + STRUCTURAL_UNITS[i] + '_' + STRUCTURAL_UNITS_PROPERTIES[i] + '=".*"');
558
//
559
//			// Noms de partitions
560
//			partNames.set(j, partNames.get(j) + '_' + STRUCTURAL_UNITS[i] + '_'  + STRUCTURAL_UNITS_PROPERTIES[i]);
561
//		}
562
//	}
563
//
564
//
565
//	// Clôture des requêtes
566
//	for(int j = 0; j < queries.size(); j++)	{
567
//
568
//		String queryEnd = ']';
569
//
570
//		// Expand to target
571
//		if(EXPAND_TARGET != null && EXPAND_TARGET != '')	{
572
//			queryEnd += ' expand to ' + EXPAND_TARGET;
573
//		}
574
//
575
//		queries.set(j, queries.get(j) + queryEnd);
576
//	}
577
//
578
//
579
//
580
//	//i++;
581
////	}
582
////	else	{
583
////
584
////	}
585
//
586
////}
587
//}
588

  
589

  
590

  
591
// Fonctionnelle pour 1 niveau de hiérarchie
592
//// Récupération des unités structurelles et création de la liste des requêtes CQL
593
//int i = 0;
594
//String supName;
595
//ArrayList queries = new ArrayList();
596
//ArrayList partNames = new ArrayList();
597
//
598
//for (suName in STRUCTURAL_UNITS) {
599
//
600
//
601
//	// Définition auto du nom de partition
602
//	NAME += suName + '_';
603
//
604
//
605
//
606
//	println "Structural Unit Name: $suName"
607
//
608
//	// Récupération des valeurs des propriétés d'unités structurelles
609
//	for (sup in discours.getStructuralUnitProperties(suName.asType(String)))	{
610
//
611
//		supName = STRUCTURAL_UNITS_PROPERTIES[i];
612
//
613
//		if(supName != '' && sup.getName() == supName)	{
614
//
615
//			// Définition auto du nom de partition
616
//			NAME += supName + ".";
617
//
618
//			// TODO : Debug
619
//			println "Structural Unit Property: " + supName + " ";
620
//
621
//			for (supValue in sup.getOrderedValues()) {
622
//
623
//				String query = '[_.' + suName + '_' + supName + '="' + supValue + '"';
624
//
625
//				// Suppression de sections
626
//				for(sectionToIgnore in STRUCTURAL_UNITS_TO_IGNORE)	{
627
//					query += ' & !' + sectionToIgnore;
628
//				}
629
//
630
//				 query += ']';
631
//
632
//				// Expand to target
633
//				if(EXPAND_TARGET != null && EXPAND_TARGET != '')	{
634
//					query += ' expand to ' + EXPAND_TARGET;
635
//				}
636
//
637
//
638
//				queries.add(query);
639
//				partNames.add(supValue);
640
//
641
//				// TODO : Debug
642
//				print supValue + " ";
643
//			}
644
//		}
645
//	}
646
//	i++;
647
//}
648

  
649

  
650

  
651

  
652
//// start
653
//if (queries.size() == partnames.size()) {
654
////	def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS)
655
//	def partition;
656
//	if (SUBCORPUS == null) {
657
//		println "partition build with $discours"
658
//		partition = discours.createPartition(NAME, queries.as, PARTNAMES)
659
//	} else {
660
//		def subcorpus
661
//		if (SUBCORPUSQUERY == null) {
662
//			println "partition build with subcorpus by name $SUBCORPUS"
663
//			subcorpus = discours.getSubcorpusByName(SUBCORPUS)
664
//			if (subcorpus == null) {
665
//				println "Error: SUBCORPUS NOT FOUND with name : $SUBCORPUS"
666
//			}
667
//		} else {
668
//			println "partition build with subcorpus with query $SUBCORPUSQUERY"
669
//			subcorpus = discours.createSubcorpus(new Query(SUBCORPUSQUERY), SUBCORPUS)
670
//		}
671
//		partition = subcorpus.createPartition(NAME, QUERIES, PARTNAMES)
672
//	}
673
//
674
//	def list = Arrays.asList(partition.getPartSizes())
675
//	println "Partition created $NAME: "+list+" parts"
676
//	// println "Total size: "+list.sum()+" - is equal to (sub)corpus size : "+(list.sum() == partition.getCorpus().getSize())
677
//    println "Total size: "+list.sum()+" - (sub)corpus size : "+(partition.getCorpus().getSize())
678
//
679
//	monitor.syncExec(new Runnable() {
680
//	public void run() {
681
//		CorporaView.refresh();
682
//		CorporaView.expand(partition.getParent());
683
//	}
684
//});
685
//} else {
686
//	println "QUERIES.size() != PARTNAMES.size() = "+QUERIES.size()+" != "+PARTNAMES.size()
687
//}
688

  
689

  
690

  
691

  
692
// parameters
693
//def CORPUS_NAME = "LIVRETOPERA10TEXTES"
694
////def SUBCORPUS = "NOMDUSOUSCORPUS"
695
//def SUBCORPUS = null // si partition sur corpus
696
////def SUBCORPUSQUERY = '"je" expand to s'
697
//def SUBCORPUSQUERY = null // si sous-corpus par nom
698

  
699

  
700

  
701

  
702

  
703

  
704
//def TEXT = "01_ACHILLE ET POLYXENE"
705
// def TEXT = "02_ZEPHIRE ET FLORE"
706
/*def QUERIES = [
707
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="1" & _.sp_who=".*"]+</lg>',
708
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="2" & _.sp_who=".*"]+</lg>',
709
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="3" & _.sp_who=".*"]+</lg>',
710
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="4" & _.sp_who=".*"]+</lg>',
711
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="5" & _.sp_who=".*"]+</lg>',
712
]
713
def PARTNAMES = [
714
"acte_1",
715
"acte_2",
716
"acte_3",
717
"acte_4",
718
"acte_5",
719
]*/
tmp/org.txm.groovy.core/src/groovy/tests/tests_cql.groovy (revision 321)
1
package tests
2
import java.util.UUID;
3

  
4
import org.txm.Toolbox
5
import org.txm.searchengine.cqp.MemCqiClient
6
import org.txm.searchengine.cqp.MemCqiServer
7
import org.txm.searchengine.cqp.corpus.*
8
import org.txm.functions.ReferencePattern
9
import org.txm.rcpapplication.views.*
10
import org.txm.searchengine.cqp.corpus.query.Query
11

  
12
// Debug
13
println "******************************** Starting ********************************************";
14

  
15

  
16
// Tests suite de requêtes directement sur le serveur et création manuelle du sous-corpus
17

  
18
// parameters
19
def CORPUS = "LIVRETOPERA10TEXTES"
20
//def SUBCORPUS = "NOMDUSOUSCORPUS"
21
// def SUBCORPUS = null // si partition sur corpus
22
//def SUBCORPUSQUERY = '"je" expand to s'
23
//def SUBCORPUSQUERY = null // si sous-corpus par nom
24

  
25

  
26
def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS)
27
def subcorpusCqpId = 'S' + UUID.randomUUID().toString();
28

  
29
try {
30
	def server = (MemCqiServer)Toolbox.getCqiServer();
31
	
32
//	server.query('show corpora;');
33
//	server.query("info " + CORPUS + ";");
34
	
35
	server.query(CORPUS + ";");
36
	
37
	//client.query(SUBCORPUSQUERY);
38

  
39
	// Suppression de variable de résultats
40
//	server.query("discard A;");
41
//	server.query("discard B;");
42
//	server.query("discard C;");
43
	
44
//	server.query('C = /region[sp,a]::a.sp_who="IPHIS";');
45

  
46
	
47
// Test mono ligne : ne fonctionne pas en mode server
48
//	server.query('A = /region[sp]; B = /region[speaker]; C = difference B A;');
49
		
50
	
51
		
52
//	server.query('A = /region[sp];');
53
//	server.query('B = /region[speaker];');
54
//	server.query('C = difference A B;');
55

  
56
	// Tests options d'affichage
57
	server.query('show cd;');
58
	server.query('show +div1 +div2 +sp +lg +l +speaker;'); // Affichage des balises XML dans les listes de résultats
59
	//server.query('set ShowTagAttributes on;');
60
	
61
	// Tests
62
//	server.query('A = /region[sp,a]::a.sp_who="THALIE";');
63
//	server.query('B = /region[speaker];');
64
//	server.query(subcorpusCqpId + ' = diff A B;');
65

  
66
	
67
	//server.query(subcorpusCqpId + ' = B;');
68
	
69
	
70
	
71
//	server.query(subcorpusCqpId + ' = <sp>[(_.) != "speaker"]+</sp>;');
72
	//server.query(subcorpusCqpId + ' = <div1>[]*<sp>[]+</sp>[]*</div1>;');
73

  
74
	//server.query(subcorpusCqpId + ' = <div1>[!speaker]*</div1>;');
75
	
76
// server.query(subcorpusCqpId + ' = <lg>[_.div1_n=".*" & _.sp_who="THALIE"]+</lg>;');	
77
//	server.query(subcorpusCqpId + ' = /region[i];');
78
	
79

  
80
	// Tests de requête sur le sous-corpus créé
81
	//server.query('set MatchingStrategy shortest;'); // standard, shortest, longest, traditional
82
	//server.query(subcorpusCqpId + ' = [div1 & _.sp_who="THALIE" & !speaker];');
83
	//server.query(subcorpusCqpId + ' = [_.div1_name=".*" & !speaker];');
84
	//server.query(subcorpusCqpId + ' = [_.div1_name=".*" & !speaker] expand to lg;');
85
	server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;');
86
	
87
	
88
	//server.query('group ' + subcorpusCqpId + ' match word;');
89

  
90
	server.query(subcorpusCqpId + ';');
91
	
92
		
93
	//server.query(subcorpusCqpId + ' = [] !;');
94
	//server.query(subcorpusCqpId + ';');
95
	//server.query(subcorpusCqpId + ' = [] !;');
96
//	server.query(subcorpusCqpId + ' = [] expand to div1 & !speaker;');
97
	
98
	
99
	
100
	//server.query(subcorpusCqpId + ' = [div1 & !speaker];');
101
	//server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;');
102
	//server.query(subcorpusCqpId + ' = [div1 & !speaker] expand to lg;');
103
	
104
	//server.query(subcorpusCqpId + ' = [div1 & !speaker];');
105
	
106
	//server.query(subcorpusCqpId + ' = [div1 &_.sp_who="THALIE" & !speaker];');
107
	
108
	//server.query(subcorpusCqpId + ' = [div1 &_.sp_who="THALIE" & !speaker];');
109
	//server.query(subcorpusCqpId + ' = "fin" [];');
110
	//server.query(subcorpusCqpId + ' = [div1 & _.sp_who="THALIE" & !speaker];');
111
	
112
	// Affichage du sous-corpus
113
	//server.query("cat " + subcorpusCqpId + " 0 10;")
114
	server.query("cat " + subcorpusCqpId + ";")
115
	
116
	//server.cqpQuery(discours.getCqpId(), 'subcorpusid', SUBCORPUSQUERY);
117

  
118
		
119
	
120
	// Affichage des erreurs
121
	try {
122
		System.out.println("client.getLastCqiError() : "+server.getLastCqiError());
123
	} catch(Exception e) { System.out.println("Exception : "+e);}
124
	try {
125
		System.out.println("client.getLastCQPError() : "+server.getLastCQPError());
126
	} catch(Exception e) { System.out.println("Exception : "+e);}
127

  
128
	
129
	
130
	// Création manuelle du sous-corpus
131
//	Subcorpus subcorpus = new Subcorpus(subcorpusCqpId, "subcorpus" + subcorpusCqpId.toString().substring(0, 5), discours, new Query()); // FIXME : est-ce que cela pose un problème de passer une Query() vide ici ?
132
//	discours.subcorpora.add(subcorpus);
133
//	subcorpus.registerToParent();
134
	
135
	// Création manuelle d'une partition
136
//	subcorpus.createPartition(NAME, QUERIES, PARTNAMES)
137
	
138
	
139
}
140
 catch(Exception e) {
141
	  System.out.println("Exception : "+e);
142
  }
143

  
144

  
145

  
146
// Refresh de l'interface
147
monitor.syncExec(new Runnable() {
148
	public void run() {
149
		CorporaView.refresh();
150
		//CorporaView.expand(partition.getParent());
151
	}
152
});
153

  
154

  
155

  
156

  
157

  
158

  
159
//// parameters
160
//def CORPUS = "LIVRETOPERA10TEXTES"
161
////def SUBCORPUS = "NOMDUSOUSCORPUS"
162
//def SUBCORPUS = null // si partition sur corpus
163
////def SUBCORPUSQUERY = '"je" expand to s'
164
//def SUBCORPUSQUERY = null // si sous-corpus par nom
165
//
166
//
167
//
168
//// Liste des unités strcuturelles
169
////def HIERARCHY = ['text_id', 'div1_n', 'sp_who', 'lg'];
170
//def STRUCTURAL_UNITS = ['text', 'div1', 'sp'];
171
//def STRUCTURAL_UNITS_PROPERTIES = ['id', 'n', 'who'];
172
////def TARGET = 'lg';
173
//
174
//String NAME = '';
175

  
176

  
177

  
178

  
179
//def TEXT = "01_ACHILLE ET POLYXENE"
180
// def TEXT = "02_ZEPHIRE ET FLORE"
181
/*def QUERIES = [
182
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="1" & _.sp_who=".*"]+</lg>',
183
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="2" & _.sp_who=".*"]+</lg>',
184
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="3" & _.sp_who=".*"]+</lg>',
185
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="4" & _.sp_who=".*"]+</lg>',
186
'<lg>[_.text_id="' + TEXT + '" & _.div1_n="5" & _.sp_who=".*"]+</lg>',
187
]
188
def PARTNAMES = [
189
"acte_1",
190
"acte_2",
191
"acte_3",
192
"acte_4",
193
"acte_5",
194
]*/
195

  
196

  
197
//def discours = CorpusManager.getCorpusManager().getCorpus(CORPUS)
198
//
199
//
200
//// Récupération des unités structurelles
201
//def i = 0;
202
//def supName;
203
//def QUERIES;
204
//def query;
205
//
206
//for (suName in STRUCTURAL_UNITS) {
207
//
208
//
209
//	// Définition auto du nom de partition
210
//	NAME += suName + '_';
211
//
212
//	//query = '';
213
//	
214
//	
215
//	println "Structural Unit Name: $suName"
216
//	
217
//	// Récupération des valeurs des propriétés d'unités structurelles
218
//	for (sup in discours.getStructuralUnitProperties(suName.asType(String)))	{
219
//		
220
//		supName = STRUCTURAL_UNITS_PROPERTIES[i];
221
//		
222
//		if(supName != '' && sup.getName() == supName)	{
223
//			
224
//			// Définition auto du nom de partition
225
//			NAME += supName + ".";
226
//			
227
//			println "Structural Unit Property: $supName";
228
//			
229
//			for (supValue in sup.getOrderedValues()) {
230
//			
231
//				//query += '<' + TARGET + '>' + supName;
232
//				
233
//				
234
//				print "$supValue "
235
//			}
236
//		}
237
//	}
238
//	i++;
239
//}
240
//
241
//// Suppression du dernier underscore ou point
242
//NAME = NAME.substring(0, NAME.length() - 1);
243
//// Debug
244
//println "NAME: " + NAME;
245

  
246

  
247

  
248

  
249

  
250

  
251

  
252

  
253
// start
254
/*if (QUERIES.size() == PARTNAMES.size()) {
255

  
256
	def partition;
257

  
258
	
259
	// Test unités structurelles
260
	for (t in discours.getStructuralUnits()) {
261
		println "Structural Unit: $t"
262
		
263
		// Test propriétés d'unités structurelles
264
		for (g in discours.getStructuralUnitProperties(t.asType(String))) {
265
			
266
			println "\tStructural Unit Property: $g"
267
			
268
			// Test valeurs des propriétés d'unités structurelles
269
			for (h in g.getOrderedValues()) {
270
				
271
				print "$h, "
272
				
273
			}
274
			
275
		}
276
		
277
	}
278
	
279
		
280
	// Test propriétés
281
	for (t in discours.getProperties()) {
282
		println "Property: $t"
283
	}
284
	
285
	
286
	if (SUBCORPUS == null) {
287
		println "partition build with $discours"
288
		partition = discours.createPartition(NAME, QUERIES, PARTNAMES)
289
	} else {
290
		def subcorpus
291
		if (SUBCORPUSQUERY == null) {
292
			println "partition build with subcorpus by name $SUBCORPUS"
293
			subcorpus = discours.getSubcorpusByName(SUBCORPUS)
294
			if (subcorpus == null) {
295
				println "Error: SUBCORPUS NOT FOUND with name : $SUBCORPUS"
296
			}
297
		} else {
298
			println "partition build with subcorpus with query $SUBCORPUSQUERY"
299
			subcorpus = discours.createSubcorpus(new Query(SUBCORPUSQUERY), SUBCORPUS)
300
		}
301
		partition = subcorpus.createPartition(NAME, QUERIES, PARTNAMES)
302
	}
303
	
304
	def list = Arrays.asList(partition.getPartSizes())
305
	println "Partition created $NAME: "+list+" parts"
306
	// println "Total size: "+list.sum()+" - is equal to (sub)corpus size : "+(list.sum() == partition.getCorpus().getSize())
307
    println "Total size: "+list.sum()+" - (sub)corpus size : "+(partition.getCorpus().getSize())
308
	
309
	monitor.syncExec(new Runnable() {
310
	public void run() {
311
		CorporaView.refresh();
312
		CorporaView.expand(partition.getParent());
313
	}
314
});
315
} else {
316
	println "QUERIES.size() != PARTNAMES.size() = "+QUERIES.size()+" != "+PARTNAMES.size()
317
}*/
318

  
tmp/org.txm.groovy.core/src/groovy/tests/tests_groovy.groovy (revision 321)
1
print(Progression5)
tmp/org.txm.groovy.core/src/groovy/org/txm/svn/package.html (revision 321)
1
<html>
2
<body>
3
<p>Contains Groovy scripts used to manipulate SVN repositories</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/InjectAnnotations.groovy (revision 321)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (Tue, 24 Jan 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.tigersearch;
29

  
30
import java.text.DateFormat;
31
import java.util.Date;
32
import java.util.ArrayList;
33
import javax.xml.stream.*;
34
import java.net.URL;
35
import org.txm.importer.filters.*;
36

  
37
// TODO: Auto-generated Javadoc
38
/**
39
 * The Class InjectAnnotations.
40
 *
41
 * @author mdecorde
42
 * 
43
 * inject annotations into ONE file
44
 */
45

  
46
public class InjectAnnotations {
47
	
48
	/** The url. */
49
	private def url;
50
	
51
	/** The input data. */
52
	private def inputData;
53
	
54
	/** The factory. */
55
	private def factory;
56
	
57
	/** The parser. */
58
	private XMLStreamReader parser;
59

  
60
	/** The reader. */
61
	private Reader reader;
62

  
63
	/** The output. */
64
	private def output;
65

  
66
	/** The solotags. */
67
	ArrayList<String> solotags;
68
	
69
	/** The lespos. */
70
	HashSet<String> lespos = new HashSet<String>();
71

  
72
	/**
73
	 * Instantiates a new inject annotations.
74
	 *
75
	 * @param url the url
76
	 * @param annotations the annotations
77
	 * @param solotags the solotags
78
	 */
79
	public InjectAnnotations(URL url, File annotations,
80
			ArrayList<String> solotags) {
81
		try {
82
			this.url = url;
83
			this.solotags = solotags;
84
			inputData = url.openStream();
85
			factory = XMLInputFactory.newInstance();
86
			parser = factory.createXMLStreamReader(inputData);
87

  
88
			reader = new FileReader(annotations);
89

  
90
		} catch (XMLStreamException ex) {
91
			System.out.println(ex);
92
		} catch (IOException ex) {
93
			System.out.println("IOException while parsing ");
94
		}
95
	}
96

  
97
	/**
98
	 * Creates the output.
99
	 *
100
	 * @param outfile the outfile
101
	 * @return true, if successful
102
	 */
103
	private boolean createOutput(File outfile) {
104
		try {
105
			File f = outfile;
106
			output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
107

  
108
			return true;
109
		} catch (Exception e) {
110
			System.out.println(e.getLocalizedMessage());
111
			return false;
112
		}
113
	}
114

  
115
	/**
116
	 * Gets the next annotation.
117
	 *
118
	 * @return the next annotation
119
	 */
120
	private String getNextAnnotation() {
121
		String line = reader.readLine();
122
		while (line.startsWith("<"))
123
			line = reader.readLine();
124
		lespos.add(line.split("\t")[1]);
125
		return line = line.split("\t")[1];
126
	}
127

  
128
	/**
129
	 * Process.
130
	 *
131
	 * @param outfile the outfile
132
	 * @return true, if successful
133
	 */
134
	public boolean process(File outfile) {
135
		if (createOutput(outfile)) {
136

  
137
			String lastopenlocalname = "";
138
			String localname = "";
139
			try {
140
				for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser
141
						.next()) {
142

  
143
					switch (event) {
144
					case XMLStreamConstants.START_ELEMENT:
145
						localname = parser.getLocalName();
146
						String prefix = parser.getPrefix();
147
						if (prefix == null || prefix == "")
148
							prefix = "";
149
						else
150
							prefix += ":";
151

  
152
						lastopenlocalname = localname;
153
						output.write("\n<" + prefix + localname);
154

  
155
						for (int i = 0; i < parser.getAttributeCount(); i++)
156
							output.write(" " + parser.getAttributeLocalName(i)
157
									+ "=\"" + parser.getAttributeValue(i)
158
									+ "\"");
159

  
160
						// get annotation
161
						if (localname.equals("t"))
162
							output.write(" pos=\"" + getNextAnnotation()
163
											+ "\"");
164

  
165
						if (solotags.contains(localname))
166
							output.write("/>");
167
						else
168
							output.write(">");
169
						break;
170

  
171
					case XMLStreamConstants.END_ELEMENT:
172

  
173
						localname = parser.getLocalName();
174
						String prefix = parser.getPrefix();
175
						if (prefix == null || prefix == "")
176
							prefix = "";
177
						else
178
							prefix += ":";
179

  
180
						switch (localname) {
181

  
182
						default:
183
							if (!solotags.contains(localname))
184
								if (lastopenlocalname.equals(localname))
185
									output.write("</" + prefix + localname
186
											+ ">");
187
								else
188
									output.write("\n</" + prefix + localname
189
											+ ">");
190
						}
191
						break;
192

  
193
					case XMLStreamConstants.CHARACTERS:
194
						output.write(parser.getText().trim());
195
						break;
196
					}
197
				}
198
				output.close();
199
				parser.close();
200
			} catch (XMLStreamException ex) {
201
				System.out.println(ex);
202
			} catch (IOException ex) {
203
				System.out.println("IOException while parsing " + inputData);
204
			}
205
		}
206
	}
207

  
208
	/**
209
	 * Gets the feature.
210
	 *
211
	 * @param f the f
212
	 * @return the feature
213
	 */
214
	public void getFeature(File f)
215
	{
216
		Writer writer = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
217
		writer.write("<feature name=\"pos\" domain=\"T\">\n")
218
		for(String pos : lespos)
219
			writer.write("<value name=\""+pos+"\"></value>\n");
220
		writer.write("</feature>\n")
221
		writer.close();
222
	}
223

  
224
	/**
225
	 * The main method.
226
	 *
227
	 * @param args the arguments
228
	 */
229
	public static void main(String[] args) {
230

  
231
		String rootDir = "~/xml/beroul/";
232
		new File(rootDir + "/identity/").mkdir();
233

  
234
		ArrayList<String> milestones = new ArrayList<String>();// the tags who
235
		// you want them
236
		// to stay
237
		// milestones
238
		milestones.add("tagUsage");
239
		milestones.add("pb");
240
		milestones.add("lb");
241
		milestones.add("catRef");
242

  
243
		File srcfile = new File(rootDir, "beroul.xml");
244
		File annotationsfiles = new File(rootDir, "result.tt");
245
		File resultfile = new File(rootDir, "beroul-result.xml");
246
		println("identity file : " + srcfile + " to : " + resultfile);
247

  
248
		def builder = new InjectAnnotations(srcfile.toURL(), annotationsfiles,
249
				milestones);
250
		builder.process(resultfile);
251

  
252
		return;
253
	}
254

  
255
}
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/package.html (revision 321)
1
<html>
2
<body>
3
<p>Manage tigerSearch source file</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/InsertAnnotationsBeroul.groovy (revision 321)
1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $ 
32
//
33
package org.txm.scripts.tigersearch;
34

  
35
import org.txm.utils.treetagger.TreeTagger;
36
import org.txm.scripts.teitxm.*;
37

  
38
// TODO: Auto-generated Javadoc
39
/**
40
 * script to insert annotation into beroul file.
41
 *
42
 * @return the java.lang. object
43
 */
44

  
45
String home = System.getProperty("user.home")
46
File rootDir = new File(home, "xml/fullberoul/")
47

  
48
ArrayList<String> milestones = new ArrayList<String>();//the tags who you want them to stay milestones
49
milestones.add("tagUsage");
50
milestones.add("pb");
51
milestones.add("lb");
52
milestones.add("catRef");
53

  
54
//transform xml tiger >> TTsrc
55
File srcfile = new File(rootDir,"beroul.xml");
56
File resultfile = new File(rootDir,"beroul.tt");
57
println("xml>>TT from : "+srcfile+" to : "+resultfile );
58

  
59
def builder = new BuildTTFile(srcfile.toURL(), milestones);
60
builder.process(resultfile, "t");
61

  
62
//tag TT
63
String infile = resultfile;
64
String modelfile = home+"/treetagger/models/fro.par";
65
String outfile = rootDir.getAbsolutePath()+"/result.tt";
66

  
67
println("proj "+modelfile+ " on " +resultfile +" >> "+outfile);
68

  
69
TreeTagger tt = new TreeTagger(home+"/treetagger/bin/");
70
tt.settoken();
71
tt.setquiet();
72
tt.setsgml();
73
tt.seteostag("<s>");
74
tt.treetagger( modelfile, infile, outfile)
75

  
76
//inject new TTattributes
77
//File srcfile = new File(rootDir,"beroul.xml");
78
File annotationsfiles = new File(rootDir,"result.tt");
79
File lastresultfile = new File(rootDir,"beroul-result.xml");
80
println("insert TT annotations : "+srcfile+" to : "+resultfile );
81

  
82
builder = new InjectAnnotations(srcfile.toURL(),annotationsfiles, milestones);
83
builder.process(lastresultfile);
84

  
85
builder.getFeature(new File(rootDir,"feature.xml"));
86
/*
87
//TAG with TnT
88
//need to replace <s> by nothing and </s> by \n
89
String encoding = "UTF-8"
90
for(String text : texts)
91
{
92
	//patch src files
93
	File f = new File(textsDir,text+".t");
94
	File temp = new File("tempFileCVScleaner")
95
	println("patch texts files "+f+": rmv <s> and replace </s>");
96
	Reader reader = new InputStreamReader(new FileInputStream(f),encoding);
97
	Writer writer = new FileWriter(temp);
98
	reader.eachLine 
99
			{
100
				if(it.trim().startsWith("</s"))
101
					writer.write("\n")
102
				else if(it.trim().startsWith("<s"))
103
					writer.write("")
104
				else
105
					writer.write(it+"\n")
106
			}
107
	reader.close();
108
	writer.close();
109
	if (!(f.delete() && temp.renameTo(f))) println "Warning can't rename file "+temp+" to "+f
110
}*/
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/tigersearch/SubCorpusToSimpleCorpus.groovy (revision 321)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.scripts.tigersearch;
29

  
30
import org.txm.sw.ReplaceXmlDomNode;
31

  
32
import java.io.File;
33
import java.util.ArrayList;
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff