Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / CwbDecode.groovy @ 479

History | View | Annotate | Download (10.9 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts;
29

    
30
import java.util.Date;
31
import java.util.Locale;
32
import java.text.DateFormat;
33
import java.io.BufferedReader;
34
import java.io.IOException;
35
import java.io.InputStream;
36
import java.io.InputStreamReader;
37
import java.util.ArrayList;
38

    
39
// Decode CWB indexes
40
// TODO: Auto-generated Javadoc
41

    
42
/**
43
 * The Class CwbDecode.
44
 */
45
class CwbDecode {
46
        
47
        /** The binpath. */
48
        String binpath = "";
49

    
50
        /**
51
         * Instantiates a new cwb decode.
52
         *
53
         * @param binpath the binpath
54
         */
55
        public CwbDecode(String binpath) {
56
                this.binpath = binpath;
57
        }
58

    
59
        /** The version. */
60
        String version = "0.0.0";
61
        
62
        /** The desc. */
63
        String desc = "Decode CWB indexes";
64
        
65
        /** The debug. */
66
        boolean debug = false;
67

    
68
        /**
69
         * Debug.
70
         *
71
         * @param b the b
72
         */
73
        public void debug(boolean b) {
74
                debug = b;
75
        };
76

    
77
        // Lisp output mode
78
        
79
        /** The is l. */
80
        private Boolean isL = false;
81

    
82
        /**
83
         * Sets the l.
84
         */
85
        public void setL() {
86
                this.isL = true;
87
        }
88

    
89
        /**
90
         * Unset l.
91
         */
92
        public void unsetL() {
93
                this.isL = false;
94
        }
95

    
96
        // concordance line ('horizonta'l) output mode
97
        
98
        /** The is h. */
99
        private Boolean isH = false;
100

    
101
        /**
102
         * Sets the h.
103
         */
104
        public void setH() {
105
                this.isH = true;
106
        }
107

    
108
        /**
109
         * Unset h.
110
         */
111
        public void unsetH() {
112
                this.isH = false;
113
        }
114

    
115
        // compact output mode (suitable for encode)
116
        
117
        /** The is c. */
118
        private Boolean isC = false;
119

    
120
        /**
121
         * Sets the c.
122
         */
123
        public void setC() {
124
                this.isC = true;
125
        }
126

    
127
        /**
128
         * Unset c.
129
         */
130
        public void unsetC() {
131
                this.isC = false;
132
        }
133

    
134
        // XML-compatible compact output (for {encode -x ...}))
135
        
136
        /** The is cx. */
137
        private Boolean isCx = false;
138

    
139
        /**
140
         * Sets the cx.
141
         */
142
        public void setCx() {
143
                this.isCx = true;
144
        }
145

    
146
        /**
147
         * Unset cx.
148
         */
149
        public void unsetCx() {
150
                this.isCx = false;
151
        }
152

    
153
        // XML output mode
154
        
155
        /** The is x. */
156
        private Boolean isX = false;
157

    
158
        /**
159
         * Sets the x.
160
         */
161
        public void setX() {
162
                this.isX = true;
163
        }
164

    
165
        /**
166
         * Unset x.
167
         */
168
        public void unsetX() {
169
                this.isX = false;
170
        }
171

    
172
        // show corpus position ('numbers')
173
        
174
        /** The isn. */
175
        private Boolean isn = false;
176

    
177
        /**
178
         * Setn.
179
         */
180
        public void setn() {
181
                this.isn = true;
182
        }
183

    
184
        /**
185
         * Unsetn.
186
         */
187
        public void unsetn() {
188
                this.isn = false;
189
        }
190

    
191
        // first token to print (at corpus position {n})
192
        
193
        /** The iss. */
194
        private Boolean iss = false;
195
        
196
        /** The s. */
197
        private int s;
198

    
199
        /**
200
         * Sets the s.
201
         *
202
         * @param arg the new s
203
         */
204
        public void sets(int arg) {
205
                this.iss = true;
206
                this.s = arg;
207
        }
208

    
209
        /**
210
         * Unsets.
211
         */
212
        public void unsets() {
213
                this.iss = false;
214
        }
215

    
216
        // last token to print (at corpus position {n})
217
        
218
        /** The ise. */
219
        private Boolean ise = false;
220
        
221
        /** The e. */
222
        private int e;
223

    
224
        /**
225
         * Sets the e.
226
         *
227
         * @param arg the new e
228
         */
229
        public void sete(int arg) {
230
                this.ise = true;
231
                this.e = arg;
232
        }
233

    
234
        /**
235
         * Unsete.
236
         */
237
        public void unsete() {
238
                this.ise = false;
239
        }
240

    
241
        // set registry directory
242
        
243
        /** The isr. */
244
        private Boolean isr = false;
245
        
246
        /** The r. */
247
        private String r;
248

    
249
        /**
250
         * Sets the r.
251
         *
252
         * @param arg the new r
253
         */
254
        public void setr(String arg) {
255
                this.isr = true;
256
                this.r = arg;
257
        }
258

    
259
        /**
260
         * Unsetr.
261
         */
262
        public void unsetr() {
263
                this.isr = false;
264
        }
265

    
266
        // matchlist mode (input from stdin)
267
        
268
        /** The isp. */
269
        private Boolean isp = false;
270

    
271
        /**
272
         * Setp.
273
         */
274
        public void setp() {
275
                this.isp = true;
276
        }
277

    
278
        /**
279
         * Unsetp.
280
         */
281
        public void unsetp() {
282
                this.isp = false;
283
        }
284

    
285
        // matchlist mode (input from {file})
286
        
287
        /** The isf. */
288
        private Boolean isf = false;
289
        
290
        /** The f. */
291
        private String f;
292

    
293
        /**
294
         * Sets the f.
295
         *
296
         * @param arg the new f
297
         */
298
        public void setf(String arg) {
299
                this.isf = true;
300
                this.f = arg;
301
        }
302

    
303
        /**
304
         * Unsetf.
305
         */
306
        public void unsetf() {
307
                this.isf = false;
308
        }
309

    
310
        // this help page
311
        
312
        /** The ish. */
313
        private Boolean ish = false;
314

    
315
        /**
316
         * Seth.
317
         */
318
        public void seth() {
319
                this.ish = true;
320
        }
321

    
322
        /**
323
         * Unseth.
324
         */
325
        public void unseth() {
326
                this.ish = false;
327
        }
328

    
329
        // print p-attribute {att}
330
        
331
        /** The is p. */
332
        private Boolean isP = false;
333
        
334
        /** The P. */
335
        private List<String> P;
336

    
337
        /**
338
         * Sets the p.
339
         *
340
         * @param arg the new p
341
         */
342
        public void setP(List<String> arg) {
343
                this.isP = true;
344
                this.P = arg;
345
        }
346

    
347
        /**
348
         * Unset p.
349
         */
350
        public void unsetP() {
351
                this.isP = false;
352
        }
353

    
354
        // print s-attribute {att} (possibly including annotations)
355
        
356
        /** The is s. */
357
        private Boolean isS = false;
358
        
359
        /** The S. */
360
        private List<String> S;
361

    
362
        /**
363
         * Sets the s.
364
         *
365
         * @param arg the new s
366
         */
367
        public void setS(List<String> arg) {
368
                this.isS = true;
369
                this.S = arg;
370
        }
371

    
372
        /**
373
         * Unset s.
374
         */
375
        public void unsetS() {
376
                this.isS = false;
377
        }
378

    
379
        // show s-attribute annotation for each range in matchlist mode
380
        
381
        /** The is v. */
382
        private Boolean isV = false;
383
        
384
        /** The V. */
385
        private List<String> V;
386

    
387
        /**
388
         * Sets the v.
389
         *
390
         * @param arg the new v
391
         */
392
        public void setV(List<String> arg) {
393
                this.isV = true;
394
                this.V = arg;
395
        }
396

    
397
        /**
398
         * Unset v.
399
         */
400
        public void unsetV() {
401
                this.isV = false;
402
        }
403

    
404
        // print alignment attribute {att}
405
        
406
        /** The is a. */
407
        private Boolean isA = false;
408
        
409
        /** The A. */
410
        private List<String> A;
411

    
412
        /**
413
         * Sets the a.
414
         *
415
         * @param arg the new a
416
         */
417
        public void setA(List<String> arg) {
418
                this.isA = true;
419
                this.A = arg;
420
        }
421

    
422
        /**
423
         * Unset a.
424
         */
425
        public void unsetA() {
426
                this.isA = false;
427
        }
428

    
429
        // print all p-attributes and s-attributes
430
        
431
        /** The is all. */
432
        private Boolean isALL = false;
433

    
434
        /**
435
         * Sets the all.
436
         */
437
        public void setALL() {
438
                this.isALL = true;
439
        }
440

    
441
        /**
442
         * Unset all.
443
         */
444
        public void unsetALL() {
445
                this.isALL = false;
446
        }
447

    
448
        // expand ranges to full {att} region (matchlist mode)
449
        
450
        /** The isc. */
451
        private Boolean isc = false;
452
        
453
        /** The c. */
454
        private String c;
455

    
456
        /**
457
         * Sets the c.
458
         *
459
         * @param arg the new c
460
         */
461
        public void setc(String arg) {
462
                this.isc = true;
463
                this.c = arg;
464
        }
465

    
466
        /**
467
         * Unsetc.
468
         */
469
        public void unsetc() {
470
                this.isc = false;
471
        }
472

    
473
        // Decode
474
        
475
        /**
476
         * Cwbdecodeexe.
477
         *
478
         * @param corpus the corpus
479
         */
480
        public void cwbdecodeexe(String corpus) throws IOException
481
        // arg : corpus name
482
        {
483
                ArrayList<String> args = new ArrayList<String>();
484
                args.add(binpath + "cwb-decode.exe");
485
                if (isL)
486
                        args.add("-L");
487
                if (isH)
488
                        args.add("-H");
489
                if (isC)
490
                        args.add("-C");
491
                if (isCx)
492
                        args.add("-Cx");
493
                if (isX)
494
                        args.add("-X");
495
                if (isn)
496
                        args.add("-n");
497
                if (iss) {
498
                        args.add("-s");
499
                        args.add("" + s);
500
                }
501
                if (ise) {
502
                        args.add("-e");
503
                        args.add("" + e);
504
                }
505
                if (isr) {
506
                        args.add("-r");
507
                        args.add("" + r);
508
                }
509
                if (isp)
510
                        args.add("-p");
511
                if (isf) {
512
                        args.add("-f");
513
                        args.add("" + f);
514
                }
515
                args.add("" + corpus);
516
                if (ish)
517
                        args.add("-h");
518
                if (isP)
519
                        for (int c = 0; c < P.size(); c++) {
520
                                args.add("-P");
521
                                args.add("" + P.get(c));
522
                        }
523
                if (isS)
524
                        for (int c = 0; c < S.size(); c++) {
525
                                args.add("-S");
526
                                args.add("" + S.get(c));
527
                        }
528
                if (isV)
529
                        for (int c = 0; c < V.size(); c++) {
530
                                args.add("-V");
531
                                args.add("" + V.get(c));
532
                        }
533
                if (isA)
534
                        for (int c = 0; c < A.size(); c++) {
535
                                args.add("-A");
536
                                args.add("" + A.get(c));
537
                        }
538
                if (isALL)
539
                        args.add("-ALL");
540
                if (isc) {
541
                        args.add("-c");
542
                        args.add("" + c);
543
                }
544

    
545
                ProcessBuilder pb = new ProcessBuilder(args);
546
                pb.redirectErrorStream(true);
547
                Process process = null;
548
                try {
549
                        process = pb.start();
550
                } catch (IOException e) {
551
                        System.err.println(e);
552
                }
553
                InputStream is = process.getInputStream();
554
                InputStreamReader isr = new InputStreamReader(is);
555
                BufferedReader br = new BufferedReader(isr);
556
                String line;
557
                while ((line = br.readLine()) != null) {
558
                        System.out.println(line);
559
                }
560
                int e = 0;
561
                try {
562
                        e = process.waitFor();
563
                } catch (Exception err) {
564
                }
565
                if (e != 0) {
566
                        System.err.println("Process exited abnormally with code "
567
                                        + e
568
                                        + " at "
569
                                        + DateFormat.getDateInstance(DateFormat.FULL, Locale.UK)
570
                                                        .format(new Date()));
571

    
572
                        for (int c = 0; c < args.size(); c++)
573
                                System.out.print("" + args.get(c) + " ");
574
                        System.out.println();
575
                }
576
        }
577

    
578
        // Decode
579
        
580
        /**
581
         * Cwbdecode.
582
         *
583
         * @param corpus the corpus
584
         */
585
        public void cwbdecode(String corpus) throws IOException
586
        // arg : corpus name
587
        {
588
                ArrayList<String> args = new ArrayList<String>();
589
                args.add(binpath + "cwb-decode");
590
                if (isL)
591
                        args.add("-L");
592
                if (isH)
593
                        args.add("-H");
594
                if (isC)
595
                        args.add("-C");
596
                if (isCx)
597
                        args.add("-Cx");
598
                if (isX)
599
                        args.add("-X");
600
                if (isn)
601
                        args.add("-n");
602
                if (iss) {
603
                        args.add("-s");
604
                        args.add("" + s);
605
                }
606
                if (ise) {
607
                        args.add("-e");
608
                        args.add("" + e);
609
                }
610
                if (isr) {
611
                        args.add("-r");
612
                        args.add("" + r);
613
                }
614
                if (isp)
615
                        args.add("-p");
616
                if (isf) {
617
                        args.add("-f");
618
                        args.add("" + f);
619
                }
620
                args.add("" + corpus);
621
                if (ish)
622
                        args.add("-h");
623
                if (isP)
624
                        for (int c = 0; c < P.size(); c++) {
625
                                args.add("-P");
626
                                args.add("" + P.get(c));
627
                        }
628
                if (isS)
629
                        for (int c = 0; c < S.size(); c++) {
630
                                args.add("-S");
631
                                args.add("" + S.get(c));
632
                        }
633
                if (isV)
634
                        for (int c = 0; c < V.size(); c++) {
635
                                args.add("-V");
636
                                args.add("" + V.get(c));
637
                        }
638
                if (isA)
639
                        for (int c = 0; c < A.size(); c++) {
640
                                args.add("-A");
641
                                args.add("" + A.get(c));
642
                        }
643
                if (isALL)
644
                        args.add("-ALL");
645
                if (isc) {
646
                        args.add("-c");
647
                        args.add("" + c);
648
                }
649

    
650
                ProcessBuilder pb = new ProcessBuilder(args);
651
                pb.redirectErrorStream(true);
652
                Process process = null;
653
                try {
654
                        process = pb.start();
655
                } catch (IOException e) {
656
                        System.err.println(e);
657
                }
658
                InputStream is = process.getInputStream();
659
                InputStreamReader isr = new InputStreamReader(is);
660
                BufferedReader br = new BufferedReader(isr);
661
                String line;
662
                while ((line = br.readLine()) != null) {
663
                        System.out.println(line);
664
                }
665
                int e = 0;
666
                try {
667
                        e = process.waitFor();
668
                } catch (Exception err) {
669
                }
670
                if (e != 0) {
671
                        System.err.println("Process exited abnormally with code "
672
                                        + e
673
                                        + " at "
674
                                        + DateFormat.getDateInstance(DateFormat.FULL, Locale.UK)
675
                                                        .format(new Date()));
676

    
677
                        for (int c = 0; c < args.size(); c++)
678
                                System.out.print("" + args.get(c) + " ");
679
                        System.out.println();
680
                }
681
        }
682

    
683
        /**
684
         * The main method.
685
         *
686
         * @param args the arguments
687
         */
688
        public static void main(String[] args) {
689
                CwbDecode tt = new CwbDecode("");
690
        }
691
}