Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xml / compiler.groovy @ 1804

History | View | Annotate | Download (20.1 kB)

1

    
2

    
3
// Copyright © 2010-2013 ENS de Lyon.
4
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
5
// Lyon 2, University of Franche-Comté, University of Nice
6
// Sophia Antipolis, University of Paris 3.
7
// 
8
// The TXM platform is free software: you can redistribute it
9
// and/or modify it under the terms of the GNU General Public
10
// License as published by the Free Software Foundation,
11
// either version 2 of the License, or (at your option) any
12
// later version.
13
// 
14
// The TXM platform is distributed in the hope that it will be
15
// useful, but WITHOUT ANY WARRANTY; without even the implied
16
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17
// PURPOSE. See the GNU General Public License for more
18
// details.
19
// 
20
// You should have received a copy of the GNU General
21
// Public License along with the TXM platform. If not, see
22
// http://www.gnu.org/licenses.
23
//
24
//
25
//
26
// $LastChangedDate: 2016-05-26 17:42:36 +0200 (jeu. 26 mai 2016) $
27
// $LastChangedRevision: 3219 $
28
// $LastChangedBy: mdecorde $
29
//
30

    
31

    
32
package org.txm.scripts.importer.xml;
33

    
34
import java.util.ArrayList
35
import java.util.Collections
36
import org.txm.importer.cwb.BuildCwbEncodeArgs
37
import org.txm.importer.cwb.CwbEncode
38
import org.txm.importer.cwb.CwbMakeAll
39
import org.txm.scripts.importer.*
40
import org.txm.scripts.*
41
import org.txm.importer.scripts.xmltxm.*
42
import org.txm.utils.treetagger.TreeTagger
43
import org.txm.objects.*
44
import javax.xml.stream.*
45

    
46
import java.net.URL
47
import java.io.File
48
import java.util.HashMap
49
import java.util.List
50
import java.util.HashMap
51
import java.util.HashSet
52
import org.txm.metadatas.*
53
import org.txm.utils.ConsoleProgressBar
54
import org.txm.utils.io.FileCopy
55
import org.txm.searchengine.cqp.corpus.*
56

    
57
/**
58
 * The "compiler" Class of the XML/w import module.
59
 */
60
class compiler {
61
        
62
        /** The debug. */
63
        private boolean debug= false;
64

    
65
        /** The input data. */
66
        private def inputData;
67

    
68
        /** The factory. */
69
        private def factory;
70

    
71
        /** The parser. */
72
        private XMLStreamReader parser;
73

    
74
        /** The dir. */
75
        private def dir;
76

    
77
        /** The output. */
78
        private def output;
79

    
80
        /** The url. */
81
        private def url;
82

    
83
        /** The anatypes. */
84
        private static anatypes = []
85
        private static anavalues = [:]
86

    
87
        /** The anahash. */
88
        private HashMap<String, String> anahash = new HashMap<String, String>() ;
89

    
90
        private static SAttributesListener sattrsListener;
91
        private static HashMap<String, ArrayList<String>> structs;
92
        private static HashMap<String, Integer> structsProf;
93

    
94
        /** The text. */
95
        String text="";
96

    
97
        /** The base. */
98
        String base="";
99

    
100
        /** The text attributes. */
101
        String[] textAttributes = null;
102

    
103
        /** The lang. */
104
        private String lang ="fr";
105

    
106
        public static sortMetadata = null;
107
        public static normalizeMetadata = false;
108

    
109
        /**
110
         * initialize.
111
         *
112
         */
113
        public compiler(){}
114

    
115
        public void setOptions(String sortmetadata, boolean normalizemetadata)
116
        {
117
                sortMetadata = sortmetadata;
118
                normalizeMetadata = normalizemetadata;
119
        }
120

    
121
        /**
122
         * Instantiates a new compiler.
123
         *
124
         * @param url the url
125
         * @param text the text
126
         * @param base the base
127
         * @param project the project
128
         */
129
        public compiler(URL url, String text, String base, String projectName)
130
        {
131
                this.text = text
132
                this.base = base;
133
                this.textAttributes = textAttributes;
134
                try {
135
                        this.url = url;
136
                        inputData = url.openStream();
137

    
138
                        factory = XMLInputFactory.newInstance();
139
                        parser = factory.createXMLStreamReader(inputData);
140

    
141
                        if (sattrsListener == null)
142
                                sattrsListener = new SAttributesListener(parser);
143
                        else
144
                                sattrsListener.start(parser)
145

    
146
                } catch (XMLStreamException ex) {
147
                        System.out.println(ex);
148
                }catch (IOException ex) {
149
                        System.err.println("IOException while parsing ");
150
                }
151
        }
152

    
153
        /**
154
         * set the language of the corpus.
155
         *
156
         * @param lang the lang
157
         * @return the java.lang. object
158
         */
159
        public setLang(String lang)
160
        {
161
                this.lang = lang;
162
        }
163

    
164
        /** The annotation success. */
165
        boolean annotationSuccess = false;
166

    
167
        /**
168
         * Sets the annotation success.
169
         *
170
         * @param val the new annotation success
171
         */
172
        public void setAnnotationSuccess(boolean val)
173
        {
174
                this.annotationSuccess = val;
175
        }
176

    
177
        /**
178
         * Creates the output.
179
         *
180
         * @param dirPathName the dir path name
181
         * @param fileName the file name
182
         * @return true, if successful
183
         */
184
        private boolean createOutput(File f){
185
                try {
186
                        output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f, f.exists())) , "UTF-8");
187
                        return true;
188
                } catch (Exception e) {
189
                        System.err.println(e);
190
                        return false;
191
                }
192
        }
193

    
194
        /**
195
         * Go to text.
196
         */
197
        private void GoToText()
198
        {
199
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
200
                        if (event == XMLStreamConstants.END_ELEMENT)
201
                                if (parser.getLocalName().equals("teiHeader"))
202
                                        return;
203
                }
204
        }
205

    
206
        /**
207
         * Transfom file cqp.
208
         *
209
         * @param dirPathName the dir path name
210
         * @param fileName the file name
211
         * @return true, if successful
212
         */
213
        public boolean transfomFileCqp(Project project, File cqpFile, HashMap<String, String> textmetadata)
214
        {
215
                if (!createOutput(cqpFile))
216
                        return false;
217

    
218
                String headvalue=""
219
                String vAna = "";
220
                String vForm = "";
221
                String wordid= "";
222
                String vHead = "";
223

    
224
                int p_id = 0;
225
                int s_id = 0;
226

    
227
                def divs = []
228
                def ncounts = [:] // contains the n values per tags with no attribute
229

    
230
                boolean captureword = false;
231
                boolean flagForm = false;
232
                boolean flagAna = false;
233

    
234
                String anatype = "";
235
                String anavalue = "";
236
                boolean stopAtFirstSort = true;
237
                boolean foundtei = false;
238
                boolean foundtext = false;
239
                //output.write("<txmcorpus lang=\""+lang+"\">\n");
240
                try {
241
                        String localname;
242
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
243
                        {
244
                                switch (event) {
245
                                        case XMLStreamConstants.START_ELEMENT:
246
                                                localname = parser.getLocalName().toLowerCase();
247
                                                if ("tei".equals(localname)) foundtei = true;
248
                                                switch (localname) {
249
                                                        case "text":
250
                                                                sattrsListener.startElement(localname);
251
                                                                foundtext = true;
252
                                                                output.write("<text id=\""+text+"\" base=\""+base+"\"" + " project=\""+project.getName()+"\"");
253
                                                        //                                                        for (String name : textmetadata.keySet())
254
                                                        //                                                                output.write(" "+name+"=\""+textmetadata.get(name)+"\"")
255
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
256
                                                                        String attrname = parser.getAttributeLocalName(i);
257
                                                                        String attrvalue = parser.getAttributeValue(i).replaceAll("\"", "&quot;")
258
                                                                        if (normalizeMetadata)
259
                                                                                attrvalue = attrvalue.toLowerCase();
260
                                                                        if (attrname != "id")
261
                                                                                output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"")
262
                                                                }
263
                                                                output.write(">\n");
264

    
265
                                                        //                                                                if (textAttributes == null) {
266
                                                        //                                                                        textAttributes = new String[parser.getAttributeCount()];
267
                                                        //
268
                                                        //                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
269
                                                        //                                                                                textAttributes[i]=parser.getAttributeLocalName(i).toLowerCase();
270
                                                        //                                                                        }
271
                                                        //                                                                }
272

    
273
                                                                break;
274

    
275

    
276
                                                        case "w":
277
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++) {
278
                                                                        if (parser.getAttributeLocalName(i).equals("id")) {
279
                                                                                wordid = parser.getAttributeValue(i);
280
                                                                        }
281
                                                                }
282
                                                                anavalues = [:];
283
                                                                break;
284
                                                        case "form":
285
                                                                flagForm = true;
286
                                                                vForm = "";
287
                                                                vAna ="";
288
                                                                break;
289

    
290
                                                        case "ana":
291
                                                                flagAna = true;
292
                                                                anavalue = "";
293
                                                                for (int i = 0 ; i < parser.getAttributeCount(); i++)
294
                                                                        if ("type".equals(parser.getAttributeLocalName(i))) {
295
                                                                                anatype = parser.getAttributeValue(i).substring(1);//remove the #
296
                                                                                break;
297
                                                                        }
298
                                                                break;
299

    
300
                                                        default:
301
//                                                                if ("div" == localname ) {
302
//                                                                        def type = localname;
303
//                                                                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
304
//                                                                                String attrname = parser.getAttributeLocalName(i);
305
//                                                                                if ("type".equals(attrname)) {
306
//                                                                                        type= parser.getAttributeValue(i)
307
//                                                                                }
308
//                                                                        }
309
//                                                                        divs << type;
310
//                                                                        localname = type
311
//                                                                }
312

    
313
                                                                if (foundtei && !foundtext) break;
314

    
315
                                                                sattrsListener.startElement(localname);
316
                                                                output.write("<"+localname);
317

    
318
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
319
                                                                        String attrname = parser.getAttributeLocalName(i);
320
                                                                        String attrvalue = parser.getAttributeValue(i)
321
                                                                        if (normalizeMetadata)
322
                                                                                attrvalue = attrvalue.toLowerCase();
323
                                                                        output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replaceAll("\"", "&quot;")+"\"")
324
                                                                }
325
                                                                if (parser.getAttributeCount() == 0) { // add the n attribute
326
                                                                        if (!ncounts.containsKey(localname)) ncounts.put(localname, 0);
327
                                                                        int ncount = ncounts.get(localname);
328
                                                                        ncounts.put(localname, ncount+1);
329
                                                                        output.write(" n=\""+ncount+"\"")
330
                                                                }
331
                                                                output.write(">\n");
332
                                                }
333
                                                break;
334

    
335
                                        case XMLStreamConstants.END_ELEMENT:
336
                                                localname = parser.getLocalName().toLowerCase();
337
                                                switch (localname) {
338
                                                        case "w":
339
                                                                for (String type : anatypes) {
340
                                                                        def v = anavalues.get(type);
341
                                                                        if (v != null) vAna +="\t"+v;
342
                                                                        else vAna +="\t";
343
                                                                }
344
                                                                vForm = vForm.replaceAll("\n", "").replaceAll("&", "&amp;").replaceAll("<", "&lt;");
345
                                                                if (vAna != null) {
346
                                                                        output.write(vForm+"\t"+wordid+vAna+"\n");
347
                                                                }
348
                                                                vAna = "";
349
                                                                vForm = "";
350
                                                                break;
351

    
352
                                                        case "tei":
353
                                                                break;
354
                                                        case "form":
355
                                                                flagForm = false;
356
                                                                break;
357
                                                        case "ana":
358
                                                                anavalues.put(anatype, anavalue)
359
                                                                flagAna = false;
360
                                                                break;
361
                                                        default:
362
                                                                if (foundtei && !foundtext) break;
363

    
364
//                                                                if ("div" == localname && divs.size() > 0) {
365
//                                                                        localname = divs.pop()
366
//                                                                }
367

    
368
                                                                sattrsListener.endElement(localname);
369
                                                                output.write("</"+localname+">\n");
370
                                                }
371
                                                break;
372

    
373
                                        case XMLStreamConstants.CHARACTERS:
374
                                                if (flagForm)
375
                                                        vForm += parser.getText().trim();
376
                                                if (flagAna) {
377
                                                        if (normalizeMetadata)
378
                                                                anavalue += parser.getText().trim().toLowerCase();
379
                                                        else
380
                                                                anavalue += parser.getText().trim();
381
                                                }
382
                                                break;
383
                                }
384
                        }
385
                        //output.write("</txmcorpus>");
386
                        output.close();
387
                        if (parser != null) parser.close();
388
                if (inputData != null) inputData.close();
389
                } catch (Exception ex) {
390
                        System.out.println("Exception while parsing " + inputData+" of Text "+text);
391
                        File xmlFile = null
392
                        File errorDir = null                
393
                        try {
394
                                xmlFile = new File(url.getFile())
395
                                errorDir = new File(cqpFile.getParentFile(), "compiler-error")
396
                                println "Warning: Moving $xmlFile to $errorDir"
397
                                errorDir.mkdir();
398
                                FileCopy.copy(xmlFile, new File(errorDir, xmlFile.getName()))
399
                        } catch(Exception eCopy) {
400
                                println "Error while moving "+url+" to "+errorDir
401
                        }
402
                        if (parser != null) parser.close();
403
                        if (inputData != null) inputData.close();
404
                        return false;
405
                }
406
                return true;
407
        }
408

    
409
        private void getAnaTypes(File xmlFile) {
410
                inputData = xmlFile.toURI().toURL().openStream();
411
                factory = XMLInputFactory.newInstance();
412
                parser = factory.createXMLStreamReader(inputData);
413
                String ana = "ana"
414
                HashSet<String> types = new HashSet<String>();
415
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
416
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
417
                                if (ana.equals(parser.getLocalName())) { // ana elem
418
                                        for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
419
                                                if ("type".equals(parser.getAttributeLocalName(i))) { // @type
420
                                                        types.add(parser.getAttributeValue(i).substring(1)); //remove the #
421
                                                        break;
422
                                                }
423
                                        }
424
                                }
425
                        }
426
                }
427
                
428
                if (parser != null) parser.close();
429
                if (inputData != null) inputData.close();
430

    
431
                for (String type : types)
432
                        if (!anatypes.contains(type))
433
                                anatypes << type
434
        }
435

    
436
        /**
437
         * Run.
438
         *
439
         * @param rootDirFile the root dir file
440
         * @param basename the basename
441
         * @param textAttributes the text attributes
442
         * @param srcfiles the srcfiles
443
         * @return true, if successful
444
         */
445
        public boolean run(Project project, File binDir, File txmDir, String corpusname, String[] textAttributes, def srcfiles, Metadatas metadatas)
446
        {
447
                sattrsListener = null; // reset SAttribute Listener for each new import
448
                String rootDir = binDir.getAbsolutePath();
449
                anatypes = [] // reset
450
                anavalues = [:] // reset
451
                
452
                if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
453
                        println ("Error: CWB executables rights are not well setted.")
454
                        return false;
455
                }
456
                
457
                CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
458
                if (corpus != null) {
459
                        if (project.getDoUpdate()) {
460
                                corpus.clean(); // remove old files
461
                        } else {
462
                                corpus.delete(); // remove old files and TXMResult children
463
                        }
464
                } else {
465
                        corpus = new MainCorpus(project);
466
                        corpus.setID(project.getName());
467
                        corpus.setName(project.getName());
468
                }
469
                corpus.setDescription("Built with the XML/w import module");
470
                
471
                File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
472
cqpFile.delete()
473

    
474
                new File(binDir,"cqp").mkdirs()
475
                new File(binDir,"data").mkdirs()
476
                new File(binDir,"registry").mkdirs()
477

    
478
                String textid = ""
479
                int counttext = 0
480
                List<File> files = txmDir.listFiles(new FileFilter() {
481
                        public boolean accept(File f) {
482
                                return !f.isDirectory() && !f.isHidden() && f.getName().endsWith(".xml");
483
                        }
484
                });
485
                //1- Transform into CQP file
486
                def builder = null
487

    
488
                //start corpus
489
                if (createOutput(cqpFile)) {
490
                        output.write("<txmcorpus lang=\""+lang+"\">\n")
491
                        output.close()
492
                }
493

    
494
                // sort files
495
                if (sortMetadata == null) {
496
                        Collections.sort(files)
497
                } else {
498
                        HashMap<File, String> sortmetadatavalues = new HashMap<File, String>()
499
                        for (File f : files) {
500
                                String value = MetadataGetter.get(f,"text", sortMetadata)
501
                                sortmetadatavalues.put(f, value)
502
                        }
503
                        println "sort properties value: "+sortmetadatavalues
504
                        Collections.sort(files, new Comparator<File>() {
505
                                /**
506
                                 * Compare.
507
                                 *
508
                                 * @param o1 the o1
509
                                 * @param o2 the o2
510
                                 * @return the int
511
                                 */
512
                                                public int compare(Object o1, Object o2) {
513
                                                        String v1 = sortmetadatavalues.get((File)o1)
514
                                                        String v2 = sortmetadatavalues.get((File)o2)
515
                                                        if (v1 == null || v2 == null) return 0;
516
                                                        return v1.compareTo(v2)
517
                                                }
518
                                        });
519
                }
520

    
521
                // get all anatypes
522
                for (File f : files) {
523
                        getAnaTypes(f)
524
                }
525

    
526
                println("Compiling "+files.size()+" files ")
527
                ConsoleProgressBar cpb = new ConsoleProgressBar(files.size())
528
                for (File f : files) {
529
                        cpb.tick()
530
                        HashMap<String, String> textmetadata;
531
                        if (metadatas != null)
532
                                textmetadata = metadatas.getTextMetadata(f)
533
                        else
534
                                textmetadata = [:]
535

    
536
                        counttext++;
537
                        if (!f.exists()) {
538
                                println("file "+f+ " does not exists")
539
                        } else {
540
                                String txtname = f.getName().substring(0,f.getName().length()-4)
541
                                builder = new compiler(f.toURI().toURL(), txtname, corpusname, "default")
542
                                builder.setLang(lang);
543
                                if (!builder.transfomFileCqp(project, cqpFile, textmetadata)) {
544
                                        println("Failed to compile "+f)
545
                                }
546
                        }
547
                }
548
                cpb.done()
549
                
550
                //end corpus
551
                if (createOutput(cqpFile)) {
552
                        output.write("</txmcorpus>\n")
553
                        output.close()
554
                }
555
                println ""
556
                //2- Import into CWB
557
                def outDir = rootDir
558

    
559
                CwbEncode cwbEn = new CwbEncode()
560
                cwbEn.setDebug(debug)
561
                CwbMakeAll cwbMa = new CwbMakeAll()
562
                cwbMa.setDebug(debug)
563

    
564
                List<String> pargs = []
565
                pargs.add("id")
566
                for (String ana : anatypes)
567
                        pargs.add(ana)
568

    
569
                String[] pAttrs = pargs
570

    
571
                structs = sattrsListener.getStructs()
572
                structsProf = sattrsListener.getProfs()
573

    
574
                if (debug) {
575
                        println structs
576
                        println structsProf
577
                }
578
                
579
                List<String> sargs = new ArrayList<String>()
580
                def tmpTextAttrs = []
581
                for (String name : structs.keySet()) {
582
                        if (name == "text") {
583
                                for (String value : structs.get(name)) // append the attributes
584
                                        tmpTextAttrs << value // added after
585
                                continue;
586
                        }
587
                        //if ( name == "q") continue; // added after
588
                        //if ( name == "foreign") continue; // added after
589
                        String concat = name+":"+structsProf.get(name); // append the depth
590
                        for (String attributeName : structs.get(name)) // append the attributes
591
                                concat += "+"+attributeName.toLowerCase();
592
                                
593
                        if (structs.get(name).size() == 0) {
594
                                concat += "+n";
595
                        } else {
596
                                if (!structs.get(name).contains("n"))
597
                                        concat += "+n"
598
                        }
599
                                
600
                        if ((name == "p" || name == "body" || name == "back" || name == "front")
601
                                 && !concat.contains("+n+") && !concat.endsWith("+n"))
602
                                concat += "+n"
603
                                
604
                        sargs.add(concat)
605
                }
606

    
607
                String textSAttributes = "text:0+id+base+project";
608
                for (String name : tmpTextAttrs) {
609
                        if (!("id".equals(name) || "base".equals(name) || "project".equals(name)))
610
                                textSAttributes += "+"+name.toLowerCase()
611
                }
612
                //                if (metadataXPath != null) {
613
                //                        for (String meta : metadataXPath.keySet()) // text property declarations from metadata.csv
614
                //                                textSAttributes+="+"+meta;
615
                //                }
616
                sargs.add(textSAttributes)
617
                sargs.add("txmcorpus:0+lang")
618

    
619
                sargs.sort()
620

    
621
                String[] sAttributes = sargs
622
                String[] pAttributes = pAttrs
623
                println "P-attributes: "+pAttributes
624
                println "S-attributes: "+sargs
625

    
626
                //if(!annotationSuccess)
627
                //pAttributes = ["id"];
628

    
629
                //println "PATTRIBUTES : "+pargs;
630
                /*
631
                 ArrayList<String> wordstag = ["w"];
632
                 println "Getting structural attributes..."
633
                 BuildCwbEncodeArgs argsgetter = new BuildCwbEncodeArgs();
634
                 HashMap<String, HashSet<String>> allStructures = new HashMap<String, HashSet<String>>();
635
                 HashMap<String, Integer> allStructuresInclusion = new HashMap<String, Integer>();
636
                 for (File srcfile: txmDir.listFiles()) {
637
                 if (!(!srcfile.getName().endsWith(".csv") && srcfile.canRead() && !srcfile.isHidden() && !srcfile.isDirectory() && ValidateXml.test(srcfile)))
638
                 continue;
639
                 print "."
640
                 argsgetter.process(srcfile, wordstag);
641
                 for (String sattr : argsgetter.getSAttributes()) {
642
                 int idx = sattr.indexOf(":");
643
                 if(idx < 0 )
644
                 continue;
645
                 String name = sattr.substring(0, idx);
646
                 if (!allStructures.containsKey(name)) {
647
                 allStructures.put(name, new HashSet<String>());
648
                 allStructuresInclusion.put(name, 0);
649
                 }
650
                 //println "sattr: "+name
651
                 String attrs = sattr.substring(idx+1);
652
                 String[] split = attrs.split("\\+");
653
                 if (split.length > 0) {
654
                 int start = 1;
655
                 try {// test if first attr is a number
656
                 int n = Integer.parseInt(split[0]);
657
                 if (n > allStructuresInclusion.get(name))
658
                 allStructuresInclusion.put(name, n);
659
                 } catch(Exception e) {start = 0;}
660
                 for (int i = start ; i < split.length ; i++)
661
                 allStructures.get(name).add(split[i]);
662
                 }
663
                 }
664
                 }
665
                 // add structures+properties found in sources
666
                 List<String> sargs = new ArrayList<String>();
667
                 for (String name : allStructuresInclusion.keySet()) {
668
                 String concat = name+":"+allStructuresInclusion.get(name);
669
                 for (String value : allStructures.get(name))
670
                 concat += "+"+value;
671
                 if (name.equals("text")) {
672
                 concat += "+base+project"
673
                 if (!concat.contains("id"))
674
                 concat += "+id";
675
                 }
676
                 sargs.add(concat);
677
                 }*/
678

    
679
                //                for (int i = 0 ; i < sargs.size() ; i++) {
680
                //                        if (sargs.get(i).startsWith("text:")) {
681
                //                                String str = sargs.get(i);
682
                //                                sargs.set(i, "text:"+str.substring(6));
683
                //                        }
684
                //                }
685

    
686
                //                String textSAttributes = "text:0+id+base+project";
687
                //                if (metadatas != null) {
688
                //                        for (String meta : metadatas.getHeadersList()) // text property declarations from metadata.csv
689
                //                                textSAttributes+="+"+meta;
690
                //                }
691
                //sargs.add(textSAttributes)
692
                //sargs.add("txmcorpus:0+lang")
693

    
694
                //                String[] sAttributes = sargs;
695
                //                System.out.println("\nCorpus structures: "+sAttributes);
696
                //                System.out.println("corpus word properties: "+pAttributes);
697

    
698
                try {
699
                        String regPath = outDir + "/registry/"+corpusname.toLowerCase();
700
                        cwbEn.run(
701
                                outDir + "/data/$corpusname", 
702
                                outDir + "/cqp/"+corpusname+".cqp", 
703
                                regPath, pAttributes, sAttributes);
704
                        if (!new File(regPath).exists()) {
705
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
706
                                return false;
707
                        }
708
                        cwbMa.run(corpusname, outDir + "/registry");
709
                } catch (Exception ex) {System.out.println(ex); return false;}
710

    
711
                return true;
712
        }
713

    
714
        /**
715
         * Sets the debug.
716
         */
717
        public void setDebug()
718
        {
719
                this.debug = true;
720
        }
721

    
722
        /**
723
         * The main method.
724
         *
725
         * @param args the arguments
726
         */
727
        public static void main(String[] args)
728
        {
729
                File dir = new File("~/xml/geo");
730
                def c = new compiler();
731
                c.setDebug();
732
                c.setCwbPath("~/TXM/cwb/bin");
733
                c.run(dir,"geo");
734
        }
735
}