Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / xml / compiler.groovy @ 187

History | View | Annotate | Download (19.6 kB)

1

    
2

    
3
// Copyright © 2010-2013 ENS de Lyon.
4
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
5
// Lyon 2, University of Franche-Comté, University of Nice
6
// Sophia Antipolis, University of Paris 3.
7
// 
8
// The TXM platform is free software: you can redistribute it
9
// and/or modify it under the terms of the GNU General Public
10
// License as published by the Free Software Foundation,
11
// either version 2 of the License, or (at your option) any
12
// later version.
13
// 
14
// The TXM platform is distributed in the hope that it will be
15
// useful, but WITHOUT ANY WARRANTY; without even the implied
16
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17
// PURPOSE. See the GNU General Public License for more
18
// details.
19
// 
20
// You should have received a copy of the GNU General
21
// Public License along with the TXM platform. If not, see
22
// http://www.gnu.org/licenses.
23
//
24
//
25
//
26
// $LastChangedDate: 2016-05-26 17:42:36 +0200 (Thu, 26 May 2016) $
27
// $LastChangedRevision: 3219 $
28
// $LastChangedBy: mdecorde $
29
//
30

    
31

    
32
package org.txm.importer.xml;
33

    
34
import java.util.ArrayList
35
import java.util.Collections
36
import org.txm.importer.cwb.BuildCwbEncodeArgs
37
import org.txm.importer.cwb.CwbEncode
38
import org.txm.importer.cwb.CwbMakeAll
39
import org.txm.importer.*
40
import org.txm.scripts.*
41
import org.txm.scripts.teitxm.*
42
import org.txm.utils.treetagger.TreeTagger
43

    
44
import javax.xml.stream.*
45

    
46
import java.net.URL
47
import java.io.File
48
import java.util.HashMap
49
import java.util.List
50
import java.util.HashMap
51
import java.util.HashSet
52
import org.txm.metadatas.*
53
import org.txm.utils.FileCopy
54

    
55
/**
56
 * The "compiler" Class of the XML/w import module.
57
 */
58
class compiler {
59
        
60
        /** The debug. */
61
        private boolean debug= false;
62

    
63
        /** The input data. */
64
        private def inputData;
65

    
66
        /** The factory. */
67
        private def factory;
68

    
69
        /** The parser. */
70
        private XMLStreamReader parser;
71

    
72
        /** The dir. */
73
        private def dir;
74

    
75
        /** The output. */
76
        private def output;
77

    
78
        /** The url. */
79
        private def url;
80

    
81
        /** The anatypes. */
82
        private static anatypes = []
83
        private static anavalues = [:]
84

    
85
        /** The anahash. */
86
        private HashMap<String, String> anahash = new HashMap<String, String>() ;
87

    
88
        private static SAttributesListener sattrsListener;
89
        private static HashMap<String, ArrayList<String>> structs;
90
        private static HashMap<String, Integer> structsProf;
91

    
92
        /** The text. */
93
        String text="";
94

    
95
        /** The base. */
96
        String base="";
97

    
98
        /** The project. */
99
        String project="";
100

    
101
        /** The text attributes. */
102
        String[] textAttributes = null;
103

    
104
        /** The cwb loc. */
105
        String cwbLoc;
106

    
107
        /** The lang. */
108
        private String lang ="fr";
109

    
110
        public static sortMetadata = null;
111
        public static normalizeMetadata = false;
112

    
113
        /**
114
         * initialize.
115
         *
116
         */
117
        public compiler(){}
118

    
119
        public void setOptions(String sortmetadata, boolean normalizemetadata)
120
        {
121
                sortMetadata = sortmetadata;
122
                normalizeMetadata = normalizemetadata;
123
        }
124

    
125
        /**
126
         * Instantiates a new compiler.
127
         *
128
         * @param url the url
129
         * @param text the text
130
         * @param base the base
131
         * @param project the project
132
         */
133
        public compiler(URL url, String text, String base, String project)
134
        {
135
                this.text = text
136
                this.base = base;
137
                this.project = project;
138
                this.textAttributes = textAttributes;
139
                try {
140
                        this.url = url;
141
                        inputData = url.openStream();
142

    
143
                        factory = XMLInputFactory.newInstance();
144
                        parser = factory.createXMLStreamReader(inputData);
145

    
146
                        if (sattrsListener == null)
147
                                sattrsListener = new SAttributesListener(parser);
148
                        else
149
                                sattrsListener.start(parser)
150

    
151
                } catch (XMLStreamException ex) {
152
                        System.out.println(ex);
153
                }catch (IOException ex) {
154
                        System.err.println("IOException while parsing ");
155
                }
156
        }
157

    
158
        /**
159
         * set the language of the corpus.
160
         *
161
         * @param lang the lang
162
         * @return the java.lang. object
163
         */
164
        public setLang(String lang)
165
        {
166
                this.lang = lang;
167
        }
168

    
169
        /** The annotation success. */
170
        boolean annotationSuccess = false;
171

    
172
        /**
173
         * Sets the annotation success.
174
         *
175
         * @param val the new annotation success
176
         */
177
        public void setAnnotationSuccess(boolean val)
178
        {
179
                this.annotationSuccess = val;
180
        }
181

    
182
        /**
183
         * Sets the cwb path.
184
         *
185
         * @param path the new cwb path
186
         */
187
        public void setCwbPath(String path)
188
        {
189
                if (!new File(path).exists())
190
                        System.err.println("CWB Path : "+path+" does not exists")
191
                cwbLoc = path;
192
        }
193

    
194
        /**
195
         * Creates the output.
196
         *
197
         * @param dirPathName the dir path name
198
         * @param fileName the file name
199
         * @return true, if successful
200
         */
201
        private boolean createOutput(File f){
202
                try {
203
                        output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f,f.exists())) , "UTF-8");
204
                        return true;
205
                } catch (Exception e) {
206
                        System.err.println(e);
207
                        return false;
208
                }
209
        }
210

    
211
        /**
212
         * Go to text.
213
         */
214
        private void GoToText()
215
        {
216
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
217
                        if (event == XMLStreamConstants.END_ELEMENT)
218
                                if (parser.getLocalName().equals("teiHeader"))
219
                                        return;
220
                }
221
        }
222

    
223
        /**
224
         * Transfom file wtc.
225
         *
226
         * @param dirPathName the dir path name
227
         * @param fileName the file name
228
         * @return true, if successful
229
         */
230
        public boolean transfomFileWtc(File wtcFile, HashMap<String, String> textmetadata)
231
        {
232
                if (!createOutput(wtcFile))
233
                        return false;
234

    
235
                String headvalue=""
236
                String vAna = "";
237
                String vForm = "";
238
                String wordid= "";
239
                String vHead = "";
240

    
241
                int p_id = 0;
242
                int s_id = 0;
243

    
244
                def divs = []
245
                def ncounts = [:] // contains the n values per tags with no attribute
246

    
247
                boolean captureword = false;
248
                boolean flagForm = false;
249
                boolean flagAna = false;
250

    
251
                String anatype = "";
252
                String anavalue = "";
253
                boolean stopAtFirstSort = true;
254
                boolean foundtei = false;
255
                boolean foundtext = false;
256
                //output.write("<txmcorpus lang=\""+lang+"\">\n");
257
                try {
258
                        String localname;
259
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
260
                        {
261
                                switch (event) {
262
                                        case XMLStreamConstants.START_ELEMENT:
263
                                                localname = parser.getLocalName().toLowerCase();
264
                                                if ("tei".equals(localname)) foundtei = true;
265
                                                switch (localname) {
266
                                                        case "text":
267
                                                                sattrsListener.startElement(localname);
268
                                                                foundtext = true;
269
                                                                output.write("<text id=\""+text+"\" base=\""+base+"\"" + " project=\""+project+"\"");
270
                                                        //                                                        for (String name : textmetadata.keySet())
271
                                                        //                                                                output.write(" "+name+"=\""+textmetadata.get(name)+"\"")
272
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
273
                                                                        String attrname = parser.getAttributeLocalName(i);
274
                                                                        String attrvalue = parser.getAttributeValue(i)
275
                                                                        if (normalizeMetadata)
276
                                                                                attrvalue = attrvalue.toLowerCase();
277
                                                                        if (attrname != "id")
278
                                                                                output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"")
279
                                                                }
280
                                                                output.write(">\n");
281

    
282
                                                        //                                                                if (textAttributes == null) {
283
                                                        //                                                                        textAttributes = new String[parser.getAttributeCount()];
284
                                                        //
285
                                                        //                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
286
                                                        //                                                                                textAttributes[i]=parser.getAttributeLocalName(i).toLowerCase();
287
                                                        //                                                                        }
288
                                                        //                                                                }
289

    
290
                                                                break;
291

    
292

    
293
                                                        case "w":
294
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++) {
295
                                                                        if (parser.getAttributeLocalName(i).equals("id")) {
296
                                                                                wordid = parser.getAttributeValue(i);
297
                                                                        }
298
                                                                }
299
                                                                anavalues = [:];
300
                                                                break;
301
                                                        case "form":
302
                                                                flagForm = true;
303
                                                                vForm = "";
304
                                                                vAna ="";
305
                                                                break;
306

    
307
                                                        case "ana":
308
                                                                flagAna = true;
309
                                                                anavalue = "";
310
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++)
311
                                                                        if ("type".equals(parser.getAttributeLocalName(i))) {
312
                                                                                anatype = parser.getAttributeValue(i).substring(1);//remove the #
313
                                                                                break;
314
                                                                        }
315
                                                                break;
316

    
317
                                                        default:
318
//                                                                if ("div" == localname ) {
319
//                                                                        def type = localname;
320
//                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
321
//                                                                                String attrname = parser.getAttributeLocalName(i);
322
//                                                                                if ("type".equals(attrname)) {
323
//                                                                                        type= parser.getAttributeValue(i)
324
//                                                                                }
325
//                                                                        }
326
//                                                                        divs << type;
327
//                                                                        localname = type
328
//                                                                }
329

    
330
                                                                if (foundtei && !foundtext) break;
331

    
332
                                                                sattrsListener.startElement(localname);
333
                                                                output.write("<"+localname);
334

    
335
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
336
                                                                        String attrname = parser.getAttributeLocalName(i);
337
                                                                        String attrvalue = parser.getAttributeValue(i)
338
                                                                        if (normalizeMetadata)
339
                                                                                attrvalue = attrvalue.toLowerCase();
340
                                                                        output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"")
341
                                                                }
342
                                                                if (parser.getAttributeCount() == 0) { // add the n attribute
343
                                                                        if (!ncounts.containsKey(localname)) ncounts.put(localname, 0);
344
                                                                        int ncount = ncounts.get(localname);
345
                                                                        ncounts.put(localname, ncount+1);
346
                                                                        output.write(" n=\""+ncount+"\"")
347
                                                                }
348
                                                                output.write(">\n");
349
                                                }
350
                                                break;
351

    
352
                                        case XMLStreamConstants.END_ELEMENT:
353
                                                localname = parser.getLocalName().toLowerCase();
354
                                                switch (localname) {
355
                                                        case "w":
356
                                                                for (String type : anatypes) {
357
                                                                        def v = anavalues.get(type);
358
                                                                        if (v != null) vAna +="\t"+v;
359
                                                                        else vAna +="\t";
360
                                                                }
361
                                                                vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
362
                                                                if (vAna != null) {
363
                                                                        output.write(vForm+"\t"+wordid+vAna+"\n");
364
                                                                }
365
                                                                vAna = "";
366
                                                                vForm = "";
367
                                                                break;
368

    
369
                                                        case "tei":
370
                                                                break;
371
                                                        case "form":
372
                                                                flagForm = false;
373
                                                                break;
374
                                                        case "ana":
375
                                                                anavalues.put(anatype, anavalue)
376
                                                                flagAna = false;
377
                                                                break;
378
                                                        default:
379
                                                                if (foundtei && !foundtext) break;
380

    
381
//                                                                if ("div" == localname && divs.size() > 0) {
382
//                                                                        localname = divs.pop()
383
//                                                                }
384

    
385
                                                                sattrsListener.endElement(localname);
386
                                                                output.write("</"+localname+">\n");
387
                                                }
388
                                                break;
389

    
390
                                        case XMLStreamConstants.CHARACTERS:
391
                                                if (flagForm)
392
                                                        vForm += parser.getText().trim();
393
                                                if (flagAna) {
394
                                                        if (normalizeMetadata)
395
                                                                anavalue += parser.getText().trim().toLowerCase();
396
                                                        else
397
                                                                anavalue += parser.getText().trim();
398
                                                }
399
                                                break;
400
                                }
401
                        }
402
                        //output.write("</txmcorpus>");
403
                        output.close();
404
                        parser.close();
405
                } catch (Exception ex) {
406
                        System.out.println("Exception while parsing " + inputData+" of Text "+text);
407
                        File xmlFile = null
408
                        File errorDir = null                
409
                        try {
410
                                xmlFile = new File(url.getFile())
411
                                errorDir = new File(wtcFile.getParentFile(), "compiler-error")
412
                                println "Warning: Moving $xmlFile to $errorDir"
413
                                errorDir.mkdir();
414
                                FileCopy.copy(xmlFile, new File(errorDir, xmlFile.getName()))
415
                        } catch(Exception eCopy) {
416
                                println "Error while moving "+url+" to "+errorDir
417
                        }
418
                        return false;
419
                }
420
                return true;
421
        }
422

    
423
        private void getAnaTypes(File xmlFile) {
424
                inputData = xmlFile.toURI().toURL().openStream();
425
                factory = XMLInputFactory.newInstance();
426
                parser = factory.createXMLStreamReader(inputData);
427
                String ana = "ana"
428
                HashSet<String> types = new HashSet<String>();
429
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
430
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
431
                                if (ana.equals(parser.getLocalName())) { // ana elem
432
                                        for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
433
                                                if ("type".equals(parser.getAttributeLocalName(i))) { // @type
434
                                                        types.add(parser.getAttributeValue(i).substring(1)); //remove the #
435
                                                        break;
436
                                                }
437
                                        }
438
                                }
439
                        }
440
                }
441
                parser.close()
442

    
443
                for (String type : types)
444
                        if (!anatypes.contains(type))
445
                                anatypes << type
446
        }
447

    
448
        /**
449
         * Run.
450
         *
451
         * @param rootDirFile the root dir file
452
         * @param basename the basename
453
         * @param textAttributes the text attributes
454
         * @param srcfiles the srcfiles
455
         * @return true, if successful
456
         */
457
        public boolean run(File binDir, File txmDir, String corpusname, String[] textAttributes, def srcfiles, Metadatas metadatas)
458
        {
459
                sattrsListener = null; // reset SAttribute Listener for each new import
460
                String rootDir = binDir.getAbsolutePath();
461
                anatypes = [] // reset
462
                anavalues = [:] // reset
463
                if (cwbLoc == null)
464
                        cwbLoc = org.txm.Toolbox.getParam(org.txm.Toolbox.CQI_SERVER_PATH_TO_CQPLIB)+File.separator;
465

    
466
                if (!new File(cwbLoc).exists()) {
467
                        println ("CWB path error: "+cwbLoc)
468
                        return false;
469
                }
470
                if (!binDir.exists()) {
471
                        println ("binary directory does not exists: "+binDir)
472
                        return false;
473
                }
474

    
475
                File wtcFile = new File(binDir, "wtc/"+corpusname+".wtc")
476
                new File(binDir, "wtc").deleteDir()
477
                new File(binDir, "wtc").mkdir()
478
                new File(binDir, "data").deleteDir()
479
                new File(binDir, "data").mkdir()
480
                new File(binDir, "registry").mkdir()
481

    
482
                String textid = ""
483
                int counttext = 0
484
                List<File> files = txmDir.listFiles()
485
                //1- Transform into WTC file
486
                def builder = null
487

    
488
                //start corpus
489
                if (createOutput(wtcFile)) {
490
                        output.write("<txmcorpus lang=\""+lang+"\">\n")
491
                        output.close()
492
                }
493

    
494
                // sort files
495
                if (sortMetadata == null) {
496
                        Collections.sort(files)
497
                } else {
498
                        HashMap<File, String> sortmetadatavalues = new HashMap<File, String>()
499
                        for (File f : files) {
500
                                String value = MetadataGetter.get(f,"text", sortMetadata)
501
                                sortmetadatavalues.put(f, value)
502
                        }
503
                        println "sort properties value: "+sortmetadatavalues
504
                        Collections.sort(files, new Comparator<File>() {
505
                                /**
506
                                 * Compare.
507
                                 *
508
                                 * @param o1 the o1
509
                                 * @param o2 the o2
510
                                 * @return the int
511
                                 */
512
                                                public int compare(Object o1, Object o2) {
513
                                                        String v1 = sortmetadatavalues.get((File)o1)
514
                                                        String v2 = sortmetadatavalues.get((File)o2)
515
                                                        if (v1 == null || v2 == null) return 0;
516
                                                        return v1.compareTo(v2)
517
                                                }
518
                                        });
519
                }
520

    
521
                // get all anatypes
522
                for (File f : files) {
523
                        getAnaTypes(f)
524
                }
525

    
526
                println("Compiling "+files.size()+" $files ")
527
                for (File f : files) {
528
                        print "."
529
                        HashMap<String, String> textmetadata;
530
                        if (metadatas != null)
531
                                textmetadata = metadatas.getTextMetadata(f)
532
                        else
533
                                textmetadata = [:]
534

    
535
                        counttext++;
536
                        if (!f.exists()) {
537
                                println("file "+f+ " does not exists")
538
                        } else {
539
                                String txtname = f.getName().substring(0,f.getName().length()-4)
540
                                builder = new compiler(f.toURI().toURL(), txtname, corpusname, "default")
541
                                builder.setLang(lang);
542
                                if (!builder.transfomFileWtc(wtcFile, textmetadata)) {
543
                                        println("Failed to compile "+f)
544
                                }
545
                        }
546
                }
547

    
548
                //end corpus
549
                if (createOutput(wtcFile)) {
550
                        output.write("</txmcorpus>\n")
551
                        output.close()
552
                }
553
                println ""
554
                //2- Import into CWB
555
                def outDir = rootDir
556

    
557
                CwbEncode cwbEn = new CwbEncode()
558
                cwbEn.setDebug(debug)
559
                CwbMakeAll cwbMa = new CwbMakeAll()
560
                cwbMa.setDebug(debug)
561

    
562
                List<String> pargs = []
563
                pargs.add("id")
564
                for (String ana : anatypes)
565
                        pargs.add(ana)
566

    
567
                String[] pAttrs = pargs
568

    
569
                structs = sattrsListener.getStructs()
570
                structsProf = sattrsListener.getProfs()
571

    
572
                if (debug) {
573
                        println structs
574
                        println structsProf
575
                }
576
                
577
                List<String> sargs = new ArrayList<String>()
578
                def tmpTextAttrs = []
579
                for (String name : structs.keySet()) {
580
                        if (name == "text") {
581
                                for (String value : structs.get(name)) // append the attributes
582
                                        tmpTextAttrs << value // added after
583
                                continue;
584
                        }
585
                        //if ( name == "q") continue; // added after
586
                        //if ( name == "foreign") continue; // added after
587
                        String concat = name+":"+structsProf.get(name); // append the depth
588
                        for (String attributeName : structs.get(name)) // append the attributes
589
                                concat += "+"+attributeName.toLowerCase();
590
                                
591
                        if (structs.get(name).size() == 0) {
592
                                concat += "+n";
593
                        } else {
594
                                if (!structs.get(name).contains("n"))
595
                                        concat += "+n"
596
                        }
597
                                
598
                        if ((name == "p" || name == "body" || name == "back" || name == "front")
599
                                 && !concat.contains("+n+") && !concat.endsWith("+n"))
600
                                concat += "+n"
601
                                
602
                        sargs.add(concat)
603
                }
604

    
605
                String textSAttributes = "text:0+id+base+project";
606
                for (String name : tmpTextAttrs) {
607
                        if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
608
                                textSAttributes += "+"+name.toLowerCase()
609
                }
610
                //                if (metadataXPath != null) {
611
                //                        for (String meta : metadataXPath.keySet()) // text property declarations from metadata.csv
612
                //                                textSAttributes+="+"+meta;
613
                //                }
614
                sargs.add(textSAttributes)
615
                sargs.add("txmcorpus:0+lang")
616

    
617
                sargs.sort()
618

    
619
                String[] sAttributes = sargs
620
                String[] pAttributes = pAttrs
621
                println "P-attributes: "+pAttributes
622
                println "S-attributes: "+sargs
623

    
624
                //if(!annotationSuccess)
625
                //pAttributes = ["id"];
626

    
627
                //println "PATTRIBUTES : "+pargs;
628
                /*
629
                 ArrayList<String> wordstag = ["w"];
630
                 println "Getting structural attributes..."
631
                 BuildCwbEncodeArgs argsgetter = new BuildCwbEncodeArgs();
632
                 HashMap<String, HashSet<String>> allStructures = new HashMap<String, HashSet<String>>();
633
                 HashMap<String, Integer> allStructuresInclusion = new HashMap<String, Integer>();
634
                 for (File srcfile: txmDir.listFiles()) {
635
                 if (!(!srcfile.getName().endsWith(".csv") && srcfile.canRead() && !srcfile.isHidden() && !srcfile.isDirectory() && ValidateXml.test(srcfile)))
636
                 continue;
637
                 print "."
638
                 argsgetter.process(srcfile, wordstag);
639
                 for (String sattr : argsgetter.getSAttributes()) {
640
                 int idx = sattr.indexOf(":");
641
                 if(idx < 0 )
642
                 continue;
643
                 String name = sattr.substring(0, idx);
644
                 if (!allStructures.containsKey(name)) {
645
                 allStructures.put(name, new HashSet<String>());
646
                 allStructuresInclusion.put(name, 0);
647
                 }
648
                 //println "sattr: "+name
649
                 String attrs = sattr.substring(idx+1);
650
                 String[] split = attrs.split("\\+");
651
                 if (split.length > 0) {
652
                 int start = 1;
653
                 try {// test if first attr is a number
654
                 int n = Integer.parseInt(split[0]);
655
                 if (n > allStructuresInclusion.get(name))
656
                 allStructuresInclusion.put(name, n);
657
                 } catch(Exception e) {start = 0;}
658
                 for (int i = start ; i < split.length ; i++)
659
                 allStructures.get(name).add(split[i]);
660
                 }
661
                 }
662
                 }
663
                 // add structures+properties found in sources
664
                 List<String> sargs = new ArrayList<String>();
665
                 for (String name : allStructuresInclusion.keySet()) {
666
                 String concat = name+":"+allStructuresInclusion.get(name);
667
                 for (String value : allStructures.get(name))
668
                 concat += "+"+value;
669
                 if (name.equals("text")) {
670
                 concat += "+base+project"
671
                 if (!concat.contains("id"))
672
                 concat += "+id";
673
                 }
674
                 sargs.add(concat);
675
                 }*/
676

    
677
                //                for (int i = 0 ; i < sargs.size() ; i++) {
678
                //                        if (sargs.get(i).startsWith("text:")) {
679
                //                                String str = sargs.get(i);
680
                //                                sargs.set(i, "text:"+str.substring(6));
681
                //                        }
682
                //                }
683

    
684
                //                String textSAttributes = "text:0+id+base+project";
685
                //                if (metadatas != null) {
686
                //                        for (String meta : metadatas.getHeadersList()) // text property declarations from metadata.csv
687
                //                                textSAttributes+="+"+meta;
688
                //                }
689
                //sargs.add(textSAttributes)
690
                //sargs.add("txmcorpus:0+lang")
691

    
692
                //                String[] sAttributes = sargs;
693
                //                System.out.println("\nCorpus structures: "+sAttributes);
694
                //                System.out.println("corpus word properties: "+pAttributes);
695

    
696
                try {
697
                        String regPath = outDir + "/registry/"+corpusname.toLowerCase();
698
                        cwbEn.run(new File(cwbLoc,"cwb-encode").getAbsolutePath(), 
699
                                outDir + "/data/$corpusname", 
700
                                outDir + "/wtc/"+corpusname+".wtc", 
701
                                regPath, pAttributes, sAttributes);
702
                        if (!new File(regPath).exists()) {
703
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
704
                                return false;
705
                        }
706
                        cwbMa.run(new File(cwbLoc,"cwb-makeall").getAbsolutePath(), corpusname, outDir + "/registry");
707
                } catch (Exception ex) {System.out.println(ex); return false;}
708

    
709
                return true;
710
        }
711

    
712
        /**
713
         * Sets the debug.
714
         */
715
        public void setDebug()
716
        {
717
                this.debug = true;
718
        }
719

    
720
        /**
721
         * The main method.
722
         *
723
         * @param args the arguments
724
         */
725
        public static void main(String[] args)
726
        {
727
                File dir = new File("~/xml/geo");
728
                def c = new compiler();
729
                c.setDebug();
730
                c.setCwbPath("~/TXM/cwb/bin");
731
                c.run(dir,"geo");
732
        }
733
}