Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xml / compiler.groovy @ 1000

History | View | Annotate | Download (19.2 kB)

1

    
2

    
3
// Copyright © 2010-2013 ENS de Lyon.
4
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
5
// Lyon 2, University of Franche-Comté, University of Nice
6
// Sophia Antipolis, University of Paris 3.
7
// 
8
// The TXM platform is free software: you can redistribute it
9
// and/or modify it under the terms of the GNU General Public
10
// License as published by the Free Software Foundation,
11
// either version 2 of the License, or (at your option) any
12
// later version.
13
// 
14
// The TXM platform is distributed in the hope that it will be
15
// useful, but WITHOUT ANY WARRANTY; without even the implied
16
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17
// PURPOSE. See the GNU General Public License for more
18
// details.
19
// 
20
// You should have received a copy of the GNU General
21
// Public License along with the TXM platform. If not, see
22
// http://www.gnu.org/licenses.
23
//
24
//
25
//
26
// $LastChangedDate: 2016-05-26 17:42:36 +0200 (jeu. 26 mai 2016) $
27
// $LastChangedRevision: 3219 $
28
// $LastChangedBy: mdecorde $
29
//
30

    
31

    
32
package org.txm.scripts.importer.xml;
33

    
34
import java.util.ArrayList
35
import java.util.Collections
36
import org.txm.importer.cwb.BuildCwbEncodeArgs
37
import org.txm.importer.cwb.CwbEncode
38
import org.txm.importer.cwb.CwbMakeAll
39
import org.txm.scripts.importer.*
40
import org.txm.scripts.*
41
import org.txm.importer.scripts.xmltxm.*
42
import org.txm.utils.treetagger.TreeTagger
43

    
44
import javax.xml.stream.*
45

    
46
import java.net.URL
47
import java.io.File
48
import java.util.HashMap
49
import java.util.List
50
import java.util.HashMap
51
import java.util.HashSet
52
import org.txm.metadatas.*
53
import org.txm.utils.io.FileCopy
54

    
55
/**
56
 * The "compiler" Class of the XML/w import module.
57
 */
58
class compiler {
59
        
60
        /** The debug. */
61
        private boolean debug= false;
62

    
63
        /** The input data. */
64
        private def inputData;
65

    
66
        /** The factory. */
67
        private def factory;
68

    
69
        /** The parser. */
70
        private XMLStreamReader parser;
71

    
72
        /** The dir. */
73
        private def dir;
74

    
75
        /** The output. */
76
        private def output;
77

    
78
        /** The url. */
79
        private def url;
80

    
81
        /** The anatypes. */
82
        private static anatypes = []
83
        private static anavalues = [:]
84

    
85
        /** The anahash. */
86
        private HashMap<String, String> anahash = new HashMap<String, String>() ;
87

    
88
        private static SAttributesListener sattrsListener;
89
        private static HashMap<String, ArrayList<String>> structs;
90
        private static HashMap<String, Integer> structsProf;
91

    
92
        /** The text. */
93
        String text="";
94

    
95
        /** The base. */
96
        String base="";
97

    
98
        /** The project. */
99
        String project="";
100

    
101
        /** The text attributes. */
102
        String[] textAttributes = null;
103

    
104
        /** The lang. */
105
        private String lang ="fr";
106

    
107
        public static sortMetadata = null;
108
        public static normalizeMetadata = false;
109

    
110
        /**
111
         * initialize.
112
         *
113
         */
114
        public compiler(){}
115

    
116
        public void setOptions(String sortmetadata, boolean normalizemetadata)
117
        {
118
                sortMetadata = sortmetadata;
119
                normalizeMetadata = normalizemetadata;
120
        }
121

    
122
        /**
123
         * Instantiates a new compiler.
124
         *
125
         * @param url the url
126
         * @param text the text
127
         * @param base the base
128
         * @param project the project
129
         */
130
        public compiler(URL url, String text, String base, String project)
131
        {
132
                this.text = text
133
                this.base = base;
134
                this.project = project;
135
                this.textAttributes = textAttributes;
136
                try {
137
                        this.url = url;
138
                        inputData = url.openStream();
139

    
140
                        factory = XMLInputFactory.newInstance();
141
                        parser = factory.createXMLStreamReader(inputData);
142

    
143
                        if (sattrsListener == null)
144
                                sattrsListener = new SAttributesListener(parser);
145
                        else
146
                                sattrsListener.start(parser)
147

    
148
                } catch (XMLStreamException ex) {
149
                        System.out.println(ex);
150
                }catch (IOException ex) {
151
                        System.err.println("IOException while parsing ");
152
                }
153
        }
154

    
155
        /**
156
         * set the language of the corpus.
157
         *
158
         * @param lang the lang
159
         * @return the java.lang. object
160
         */
161
        public setLang(String lang)
162
        {
163
                this.lang = lang;
164
        }
165

    
166
        /** The annotation success. */
167
        boolean annotationSuccess = false;
168

    
169
        /**
170
         * Sets the annotation success.
171
         *
172
         * @param val the new annotation success
173
         */
174
        public void setAnnotationSuccess(boolean val)
175
        {
176
                this.annotationSuccess = val;
177
        }
178

    
179
        /**
180
         * Creates the output.
181
         *
182
         * @param dirPathName the dir path name
183
         * @param fileName the file name
184
         * @return true, if successful
185
         */
186
        private boolean createOutput(File f){
187
                try {
188
                        output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f,f.exists())) , "UTF-8");
189
                        return true;
190
                } catch (Exception e) {
191
                        System.err.println(e);
192
                        return false;
193
                }
194
        }
195

    
196
        /**
197
         * Go to text.
198
         */
199
        private void GoToText()
200
        {
201
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
202
                        if (event == XMLStreamConstants.END_ELEMENT)
203
                                if (parser.getLocalName().equals("teiHeader"))
204
                                        return;
205
                }
206
        }
207

    
208
        /**
209
         * Transfom file cqp.
210
         *
211
         * @param dirPathName the dir path name
212
         * @param fileName the file name
213
         * @return true, if successful
214
         */
215
        public boolean transfomFileCqp(File cqpFile, HashMap<String, String> textmetadata)
216
        {
217
                if (!createOutput(cqpFile))
218
                        return false;
219

    
220
                String headvalue=""
221
                String vAna = "";
222
                String vForm = "";
223
                String wordid= "";
224
                String vHead = "";
225

    
226
                int p_id = 0;
227
                int s_id = 0;
228

    
229
                def divs = []
230
                def ncounts = [:] // contains the n values per tags with no attribute
231

    
232
                boolean captureword = false;
233
                boolean flagForm = false;
234
                boolean flagAna = false;
235

    
236
                String anatype = "";
237
                String anavalue = "";
238
                boolean stopAtFirstSort = true;
239
                boolean foundtei = false;
240
                boolean foundtext = false;
241
                //output.write("<txmcorpus lang=\""+lang+"\">\n");
242
                try {
243
                        String localname;
244
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
245
                        {
246
                                switch (event) {
247
                                        case XMLStreamConstants.START_ELEMENT:
248
                                                localname = parser.getLocalName().toLowerCase();
249
                                                if ("tei".equals(localname)) foundtei = true;
250
                                                switch (localname) {
251
                                                        case "text":
252
                                                                sattrsListener.startElement(localname);
253
                                                                foundtext = true;
254
                                                                output.write("<text id=\""+text+"\" base=\""+base+"\"" + " project=\""+project+"\"");
255
                                                        //                                                        for (String name : textmetadata.keySet())
256
                                                        //                                                                output.write(" "+name+"=\""+textmetadata.get(name)+"\"")
257
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
258
                                                                        String attrname = parser.getAttributeLocalName(i);
259
                                                                        String attrvalue = parser.getAttributeValue(i)
260
                                                                        if (normalizeMetadata)
261
                                                                                attrvalue = attrvalue.toLowerCase();
262
                                                                        if (attrname != "id")
263
                                                                                output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"")
264
                                                                }
265
                                                                output.write(">\n");
266

    
267
                                                        //                                                                if (textAttributes == null) {
268
                                                        //                                                                        textAttributes = new String[parser.getAttributeCount()];
269
                                                        //
270
                                                        //                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
271
                                                        //                                                                                textAttributes[i]=parser.getAttributeLocalName(i).toLowerCase();
272
                                                        //                                                                        }
273
                                                        //                                                                }
274

    
275
                                                                break;
276

    
277

    
278
                                                        case "w":
279
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++) {
280
                                                                        if (parser.getAttributeLocalName(i).equals("id")) {
281
                                                                                wordid = parser.getAttributeValue(i);
282
                                                                        }
283
                                                                }
284
                                                                anavalues = [:];
285
                                                                break;
286
                                                        case "form":
287
                                                                flagForm = true;
288
                                                                vForm = "";
289
                                                                vAna ="";
290
                                                                break;
291

    
292
                                                        case "ana":
293
                                                                flagAna = true;
294
                                                                anavalue = "";
295
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++)
296
                                                                        if ("type".equals(parser.getAttributeLocalName(i))) {
297
                                                                                anatype = parser.getAttributeValue(i).substring(1);//remove the #
298
                                                                                break;
299
                                                                        }
300
                                                                break;
301

    
302
                                                        default:
303
//                                                                if ("div" == localname ) {
304
//                                                                        def type = localname;
305
//                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
306
//                                                                                String attrname = parser.getAttributeLocalName(i);
307
//                                                                                if ("type".equals(attrname)) {
308
//                                                                                        type= parser.getAttributeValue(i)
309
//                                                                                }
310
//                                                                        }
311
//                                                                        divs << type;
312
//                                                                        localname = type
313
//                                                                }
314

    
315
                                                                if (foundtei && !foundtext) break;
316

    
317
                                                                sattrsListener.startElement(localname);
318
                                                                output.write("<"+localname);
319

    
320
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
321
                                                                        String attrname = parser.getAttributeLocalName(i);
322
                                                                        String attrvalue = parser.getAttributeValue(i)
323
                                                                        if (normalizeMetadata)
324
                                                                                attrvalue = attrvalue.toLowerCase();
325
                                                                        output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"")
326
                                                                }
327
                                                                if (parser.getAttributeCount() == 0) { // add the n attribute
328
                                                                        if (!ncounts.containsKey(localname)) ncounts.put(localname, 0);
329
                                                                        int ncount = ncounts.get(localname);
330
                                                                        ncounts.put(localname, ncount+1);
331
                                                                        output.write(" n=\""+ncount+"\"")
332
                                                                }
333
                                                                output.write(">\n");
334
                                                }
335
                                                break;
336

    
337
                                        case XMLStreamConstants.END_ELEMENT:
338
                                                localname = parser.getLocalName().toLowerCase();
339
                                                switch (localname) {
340
                                                        case "w":
341
                                                                for (String type : anatypes) {
342
                                                                        def v = anavalues.get(type);
343
                                                                        if (v != null) vAna +="\t"+v;
344
                                                                        else vAna +="\t";
345
                                                                }
346
                                                                vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
347
                                                                if (vAna != null) {
348
                                                                        output.write(vForm+"\t"+wordid+vAna+"\n");
349
                                                                }
350
                                                                vAna = "";
351
                                                                vForm = "";
352
                                                                break;
353

    
354
                                                        case "tei":
355
                                                                break;
356
                                                        case "form":
357
                                                                flagForm = false;
358
                                                                break;
359
                                                        case "ana":
360
                                                                anavalues.put(anatype, anavalue)
361
                                                                flagAna = false;
362
                                                                break;
363
                                                        default:
364
                                                                if (foundtei && !foundtext) break;
365

    
366
//                                                                if ("div" == localname && divs.size() > 0) {
367
//                                                                        localname = divs.pop()
368
//                                                                }
369

    
370
                                                                sattrsListener.endElement(localname);
371
                                                                output.write("</"+localname+">\n");
372
                                                }
373
                                                break;
374

    
375
                                        case XMLStreamConstants.CHARACTERS:
376
                                                if (flagForm)
377
                                                        vForm += parser.getText().trim();
378
                                                if (flagAna) {
379
                                                        if (normalizeMetadata)
380
                                                                anavalue += parser.getText().trim().toLowerCase();
381
                                                        else
382
                                                                anavalue += parser.getText().trim();
383
                                                }
384
                                                break;
385
                                }
386
                        }
387
                        //output.write("</txmcorpus>");
388
                        output.close();
389
                        parser.close();
390
                } catch (Exception ex) {
391
                        System.out.println("Exception while parsing " + inputData+" of Text "+text);
392
                        File xmlFile = null
393
                        File errorDir = null                
394
                        try {
395
                                xmlFile = new File(url.getFile())
396
                                errorDir = new File(cqpFile.getParentFile(), "compiler-error")
397
                                println "Warning: Moving $xmlFile to $errorDir"
398
                                errorDir.mkdir();
399
                                FileCopy.copy(xmlFile, new File(errorDir, xmlFile.getName()))
400
                        } catch(Exception eCopy) {
401
                                println "Error while moving "+url+" to "+errorDir
402
                        }
403
                        return false;
404
                }
405
                return true;
406
        }
407

    
408
        private void getAnaTypes(File xmlFile) {
409
                inputData = xmlFile.toURI().toURL().openStream();
410
                factory = XMLInputFactory.newInstance();
411
                parser = factory.createXMLStreamReader(inputData);
412
                String ana = "ana"
413
                HashSet<String> types = new HashSet<String>();
414
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
415
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
416
                                if (ana.equals(parser.getLocalName())) { // ana elem
417
                                        for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
418
                                                if ("type".equals(parser.getAttributeLocalName(i))) { // @type
419
                                                        types.add(parser.getAttributeValue(i).substring(1)); //remove the #
420
                                                        break;
421
                                                }
422
                                        }
423
                                }
424
                        }
425
                }
426
                parser.close()
427

    
428
                for (String type : types)
429
                        if (!anatypes.contains(type))
430
                                anatypes << type
431
        }
432

    
433
        /**
434
         * Run.
435
         *
436
         * @param rootDirFile the root dir file
437
         * @param basename the basename
438
         * @param textAttributes the text attributes
439
         * @param srcfiles the srcfiles
440
         * @return true, if successful
441
         */
442
        public boolean run(File binDir, File txmDir, String corpusname, String[] textAttributes, def srcfiles, Metadatas metadatas)
443
        {
444
                sattrsListener = null; // reset SAttribute Listener for each new import
445
                String rootDir = binDir.getAbsolutePath();
446
                anatypes = [] // reset
447
                anavalues = [:] // reset
448
                
449
                if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
450
                        println ("Error: CWB executables not well set.")
451
                        return false;
452
                }
453
                if (!binDir.exists()) {
454
                        println ("binary directory does not exists: "+binDir)
455
                        return false;
456
                }
457

    
458
                File cqpFile = new File(binDir, "cqp/"+corpusname+".cqp")
459
                new File(binDir, "cqp").deleteDir()
460
                new File(binDir, "cqp").mkdir()
461
                new File(binDir, "data").deleteDir()
462
                new File(binDir, "data").mkdir()
463
                new File(binDir, "registry").mkdir()
464

    
465
                String textid = ""
466
                int counttext = 0
467
                List<File> files = txmDir.listFiles()
468
                //1- Transform into CQP file
469
                def builder = null
470

    
471
                //start corpus
472
                if (createOutput(cqpFile)) {
473
                        output.write("<txmcorpus lang=\""+lang+"\">\n")
474
                        output.close()
475
                }
476

    
477
                // sort files
478
                if (sortMetadata == null) {
479
                        Collections.sort(files)
480
                } else {
481
                        HashMap<File, String> sortmetadatavalues = new HashMap<File, String>()
482
                        for (File f : files) {
483
                                String value = MetadataGetter.get(f,"text", sortMetadata)
484
                                sortmetadatavalues.put(f, value)
485
                        }
486
                        println "sort properties value: "+sortmetadatavalues
487
                        Collections.sort(files, new Comparator<File>() {
488
                                /**
489
                                 * Compare.
490
                                 *
491
                                 * @param o1 the o1
492
                                 * @param o2 the o2
493
                                 * @return the int
494
                                 */
495
                                                public int compare(Object o1, Object o2) {
496
                                                        String v1 = sortmetadatavalues.get((File)o1)
497
                                                        String v2 = sortmetadatavalues.get((File)o2)
498
                                                        if (v1 == null || v2 == null) return 0;
499
                                                        return v1.compareTo(v2)
500
                                                }
501
                                        });
502
                }
503

    
504
                // get all anatypes
505
                for (File f : files) {
506
                        getAnaTypes(f)
507
                }
508

    
509
                println("Compiling "+files.size()+" $files ")
510
                for (File f : files) {
511
                        print "."
512
                        HashMap<String, String> textmetadata;
513
                        if (metadatas != null)
514
                                textmetadata = metadatas.getTextMetadata(f)
515
                        else
516
                                textmetadata = [:]
517

    
518
                        counttext++;
519
                        if (!f.exists()) {
520
                                println("file "+f+ " does not exists")
521
                        } else {
522
                                String txtname = f.getName().substring(0,f.getName().length()-4)
523
                                builder = new compiler(f.toURI().toURL(), txtname, corpusname, "default")
524
                                builder.setLang(lang);
525
                                if (!builder.transfomFileCqp(cqpFile, textmetadata)) {
526
                                        println("Failed to compile "+f)
527
                                }
528
                        }
529
                }
530

    
531
                //end corpus
532
                if (createOutput(cqpFile)) {
533
                        output.write("</txmcorpus>\n")
534
                        output.close()
535
                }
536
                println ""
537
                //2- Import into CWB
538
                def outDir = rootDir
539

    
540
                CwbEncode cwbEn = new CwbEncode()
541
                cwbEn.setDebug(debug)
542
                CwbMakeAll cwbMa = new CwbMakeAll()
543
                cwbMa.setDebug(debug)
544

    
545
                List<String> pargs = []
546
                pargs.add("id")
547
                for (String ana : anatypes)
548
                        pargs.add(ana)
549

    
550
                String[] pAttrs = pargs
551

    
552
                structs = sattrsListener.getStructs()
553
                structsProf = sattrsListener.getProfs()
554

    
555
                if (debug) {
556
                        println structs
557
                        println structsProf
558
                }
559
                
560
                List<String> sargs = new ArrayList<String>()
561
                def tmpTextAttrs = []
562
                for (String name : structs.keySet()) {
563
                        if (name == "text") {
564
                                for (String value : structs.get(name)) // append the attributes
565
                                        tmpTextAttrs << value // added after
566
                                continue;
567
                        }
568
                        //if ( name == "q") continue; // added after
569
                        //if ( name == "foreign") continue; // added after
570
                        String concat = name+":"+structsProf.get(name); // append the depth
571
                        for (String attributeName : structs.get(name)) // append the attributes
572
                                concat += "+"+attributeName.toLowerCase();
573
                                
574
                        if (structs.get(name).size() == 0) {
575
                                concat += "+n";
576
                        } else {
577
                                if (!structs.get(name).contains("n"))
578
                                        concat += "+n"
579
                        }
580
                                
581
                        if ((name == "p" || name == "body" || name == "back" || name == "front")
582
                                 && !concat.contains("+n+") && !concat.endsWith("+n"))
583
                                concat += "+n"
584
                                
585
                        sargs.add(concat)
586
                }
587

    
588
                String textSAttributes = "text:0+id+base+project";
589
                for (String name : tmpTextAttrs) {
590
                        if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
591
                                textSAttributes += "+"+name.toLowerCase()
592
                }
593
                //                if (metadataXPath != null) {
594
                //                        for (String meta : metadataXPath.keySet()) // text property declarations from metadata.csv
595
                //                                textSAttributes+="+"+meta;
596
                //                }
597
                sargs.add(textSAttributes)
598
                sargs.add("txmcorpus:0+lang")
599

    
600
                sargs.sort()
601

    
602
                String[] sAttributes = sargs
603
                String[] pAttributes = pAttrs
604
                println "P-attributes: "+pAttributes
605
                println "S-attributes: "+sargs
606

    
607
                //if(!annotationSuccess)
608
                //pAttributes = ["id"];
609

    
610
                //println "PATTRIBUTES : "+pargs;
611
                /*
612
                 ArrayList<String> wordstag = ["w"];
613
                 println "Getting structural attributes..."
614
                 BuildCwbEncodeArgs argsgetter = new BuildCwbEncodeArgs();
615
                 HashMap<String, HashSet<String>> allStructures = new HashMap<String, HashSet<String>>();
616
                 HashMap<String, Integer> allStructuresInclusion = new HashMap<String, Integer>();
617
                 for (File srcfile: txmDir.listFiles()) {
618
                 if (!(!srcfile.getName().endsWith(".csv") && srcfile.canRead() && !srcfile.isHidden() && !srcfile.isDirectory() && ValidateXml.test(srcfile)))
619
                 continue;
620
                 print "."
621
                 argsgetter.process(srcfile, wordstag);
622
                 for (String sattr : argsgetter.getSAttributes()) {
623
                 int idx = sattr.indexOf(":");
624
                 if(idx < 0 )
625
                 continue;
626
                 String name = sattr.substring(0, idx);
627
                 if (!allStructures.containsKey(name)) {
628
                 allStructures.put(name, new HashSet<String>());
629
                 allStructuresInclusion.put(name, 0);
630
                 }
631
                 //println "sattr: "+name
632
                 String attrs = sattr.substring(idx+1);
633
                 String[] split = attrs.split("\\+");
634
                 if (split.length > 0) {
635
                 int start = 1;
636
                 try {// test if first attr is a number
637
                 int n = Integer.parseInt(split[0]);
638
                 if (n > allStructuresInclusion.get(name))
639
                 allStructuresInclusion.put(name, n);
640
                 } catch(Exception e) {start = 0;}
641
                 for (int i = start ; i < split.length ; i++)
642
                 allStructures.get(name).add(split[i]);
643
                 }
644
                 }
645
                 }
646
                 // add structures+properties found in sources
647
                 List<String> sargs = new ArrayList<String>();
648
                 for (String name : allStructuresInclusion.keySet()) {
649
                 String concat = name+":"+allStructuresInclusion.get(name);
650
                 for (String value : allStructures.get(name))
651
                 concat += "+"+value;
652
                 if (name.equals("text")) {
653
                 concat += "+base+project"
654
                 if (!concat.contains("id"))
655
                 concat += "+id";
656
                 }
657
                 sargs.add(concat);
658
                 }*/
659

    
660
                //                for (int i = 0 ; i < sargs.size() ; i++) {
661
                //                        if (sargs.get(i).startsWith("text:")) {
662
                //                                String str = sargs.get(i);
663
                //                                sargs.set(i, "text:"+str.substring(6));
664
                //                        }
665
                //                }
666

    
667
                //                String textSAttributes = "text:0+id+base+project";
668
                //                if (metadatas != null) {
669
                //                        for (String meta : metadatas.getHeadersList()) // text property declarations from metadata.csv
670
                //                                textSAttributes+="+"+meta;
671
                //                }
672
                //sargs.add(textSAttributes)
673
                //sargs.add("txmcorpus:0+lang")
674

    
675
                //                String[] sAttributes = sargs;
676
                //                System.out.println("\nCorpus structures: "+sAttributes);
677
                //                System.out.println("corpus word properties: "+pAttributes);
678

    
679
                try {
680
                        String regPath = outDir + "/registry/"+corpusname.toLowerCase();
681
                        cwbEn.run(
682
                                outDir + "/data/$corpusname", 
683
                                outDir + "/cqp/"+corpusname+".cqp", 
684
                                regPath, pAttributes, sAttributes);
685
                        if (!new File(regPath).exists()) {
686
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
687
                                return false;
688
                        }
689
                        cwbMa.run(corpusname, outDir + "/registry");
690
                } catch (Exception ex) {System.out.println(ex); return false;}
691

    
692
                return true;
693
        }
694

    
695
        /**
696
         * Sets the debug.
697
         */
698
        public void setDebug()
699
        {
700
                this.debug = true;
701
        }
702

    
703
        /**
704
         * The main method.
705
         *
706
         * @param args the arguments
707
         */
708
        public static void main(String[] args)
709
        {
710
                File dir = new File("~/xml/geo");
711
                def c = new compiler();
712
                c.setDebug();
713
                c.setCwbPath("~/TXM/cwb/bin");
714
                c.run(dir,"geo");
715
        }
716
}