Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xmltxm / compiler.groovy @ 1804

History | View | Annotate | Download (13 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2016-05-26 17:42:36 +0200 (jeu. 26 mai 2016) $
25 321 mdecorde
// $LastChangedRevision: 3219 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 321 mdecorde
29 321 mdecorde
30 986 mdecorde
package org.txm.scripts.importer.xmltxm;
31 321 mdecorde
32 321 mdecorde
import java.util.ArrayList;;
33 321 mdecorde
34 321 mdecorde
import org.txm.*;
35 927 mdecorde
import org.txm.core.engines.*;
36 1000 mdecorde
import org.txm.importer.cwb.BuildCwbEncodeArgs;
37 1000 mdecorde
import org.txm.importer.cwb.CwbEncode
38 1000 mdecorde
import org.txm.importer.cwb.CwbMakeAll
39 986 mdecorde
import org.txm.scripts.importer.*;
40 1115 mdecorde
import org.txm.objects.*;
41 321 mdecorde
import org.txm.scripts.*;
42 1000 mdecorde
import org.txm.importer.scripts.xmltxm.*;
43 321 mdecorde
import org.txm.utils.treetagger.TreeTagger;
44 1115 mdecorde
import org.txm.searchengine.cqp.corpus.*
45 321 mdecorde
import javax.xml.stream.*;
46 321 mdecorde
import java.net.URL;
47 321 mdecorde
import java.io.File;
48 321 mdecorde
import java.util.HashMap;
49 321 mdecorde
import java.util.List;
50 321 mdecorde
51 321 mdecorde
/**
52 321 mdecorde
 * The Class compiler.
53 321 mdecorde
 */
54 321 mdecorde
class compiler
55 321 mdecorde
{
56 321 mdecorde
        String sortMetadata;
57 321 mdecorde
        /** The debug. */
58 321 mdecorde
        private boolean debug= false;
59 321 mdecorde
60 321 mdecorde
        /** The input data. */
61 321 mdecorde
        private def inputData;
62 321 mdecorde
63 321 mdecorde
        /** The factory. */
64 321 mdecorde
        private def factory;
65 321 mdecorde
66 321 mdecorde
        /** The parser. */
67 321 mdecorde
        private XMLStreamReader parser;
68 321 mdecorde
69 321 mdecorde
        /** The dir. */
70 321 mdecorde
        private def dir;
71 321 mdecorde
72 321 mdecorde
        /** The output. */
73 321 mdecorde
        private Writer output;
74 321 mdecorde
75 321 mdecorde
        /** The url. */
76 321 mdecorde
        private def url;
77 321 mdecorde
78 321 mdecorde
        /** The text. */
79 321 mdecorde
        String text="";
80 321 mdecorde
81 321 mdecorde
        /** The base. */
82 321 mdecorde
        String base="";
83 321 mdecorde
84 321 mdecorde
        /** The project. */
85 1115 mdecorde
        String projectName="";
86 321 mdecorde
87 321 mdecorde
        /** The lang. */
88 321 mdecorde
        private String lang ="fr";
89 321 mdecorde
90 321 mdecorde
        /** The s attribs. */
91 321 mdecorde
        private static HashMap<String, List<String>> sAttribs;
92 321 mdecorde
93 321 mdecorde
        /** The anatypes. */
94 321 mdecorde
        private static anatypes = []
95 321 mdecorde
        private static anavalues = [:]
96 321 mdecorde
97 321 mdecorde
        /**
98 321 mdecorde
         * initialize.
99 321 mdecorde
         *
100 321 mdecorde
         */
101 321 mdecorde
        public compiler(){}
102 321 mdecorde
103 321 mdecorde
        /**
104 321 mdecorde
         * Instantiates a new compiler.
105 321 mdecorde
         *
106 321 mdecorde
         * @param url the url
107 321 mdecorde
         * @param text the text
108 321 mdecorde
         * @param base the base
109 321 mdecorde
         * @param project the project
110 321 mdecorde
         */
111 1115 mdecorde
        public compiler(URL url,String text,String base, String projectName)
112 321 mdecorde
        {
113 321 mdecorde
                this.text = text
114 321 mdecorde
                this.base = base;
115 1115 mdecorde
                this.projectName = projectName;
116 321 mdecorde
                try {
117 321 mdecorde
                        this.url = url;
118 321 mdecorde
                        inputData = url.openStream();
119 321 mdecorde
120 321 mdecorde
                        factory = XMLInputFactory.newInstance();
121 321 mdecorde
                        parser = factory.createXMLStreamReader(inputData);
122 321 mdecorde
                } catch (Exception ex) {
123 321 mdecorde
                        System.out.println("Error while creating indexes: $ex");
124 321 mdecorde
                        ex.printStackTrace();
125 321 mdecorde
                }
126 321 mdecorde
        }
127 321 mdecorde
128 321 mdecorde
        public void setSortMetadata(String sortMetadata)
129 321 mdecorde
        {
130 321 mdecorde
                this.sortMetadata = sortMetadata;
131 321 mdecorde
        }
132 321 mdecorde
133 321 mdecorde
        /**
134 321 mdecorde
         * set the language of the corpus.
135 321 mdecorde
         *
136 321 mdecorde
         * @param lang the lang
137 321 mdecorde
         * @return the java.lang. object
138 321 mdecorde
         */
139 321 mdecorde
        public setLang(String lang)
140 321 mdecorde
        {
141 321 mdecorde
                this.lang = lang;
142 321 mdecorde
        }
143 321 mdecorde
144 321 mdecorde
        /**
145 321 mdecorde
         * Creates the output.
146 321 mdecorde
         *
147 321 mdecorde
         * @param dirPathName the dir path name
148 321 mdecorde
         * @param fileName the file name
149 321 mdecorde
         * @return true, if successful
150 321 mdecorde
         */
151 321 mdecorde
        private boolean createOutput(File f) {
152 321 mdecorde
                try {
153 321 mdecorde
                        //File f = new File(dirPathName, fileName)
154 321 mdecorde
                        output = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(f,f.exists())) , "UTF-8");
155 321 mdecorde
                        return true;
156 321 mdecorde
                } catch (Exception e) {
157 804 mdecorde
                println "Error while create CQP otput file: "+e
158 321 mdecorde
                        e.printStackTrace();
159 321 mdecorde
160 321 mdecorde
                        return false;
161 321 mdecorde
                }
162 321 mdecorde
        }
163 321 mdecorde
164 321 mdecorde
        /**
165 321 mdecorde
         * Go to text.
166 321 mdecorde
         */
167 321 mdecorde
        private boolean GoToText()
168 321 mdecorde
        {
169 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
170 321 mdecorde
                        if (event == XMLStreamConstants.END_ELEMENT)
171 321 mdecorde
                                if (parser.getLocalName().equals("teiHeader") || parser.getLocalName().equals("teiheader")) {
172 321 mdecorde
                                        return true;
173 321 mdecorde
                                }
174 321 mdecorde
                }
175 321 mdecorde
                return false;
176 321 mdecorde
        }
177 321 mdecorde
178 321 mdecorde
        /**
179 803 mdecorde
         * Transfom file cqp.
180 321 mdecorde
         *
181 321 mdecorde
         * @param dirPathName the dir path name
182 321 mdecorde
         * @param fileName the file name
183 321 mdecorde
         * @return true, if successful
184 321 mdecorde
         */
185 804 mdecorde
        public boolean transfomFileCqp(File cqpFile)
186 321 mdecorde
        {
187 803 mdecorde
                createOutput(cqpFile);
188 321 mdecorde
                String headvalue=""
189 321 mdecorde
                String vAna = "";
190 321 mdecorde
                String vForm = "";
191 321 mdecorde
                String wordid= "";
192 321 mdecorde
                String vHead = "";
193 321 mdecorde
                String anatype = null;
194 321 mdecorde
                String anavalue = null;
195 321 mdecorde
                int p_id = 0;
196 321 mdecorde
                int s_id = 0;
197 321 mdecorde
198 321 mdecorde
                boolean captureword = false;
199 321 mdecorde
                boolean flagForm = false;
200 321 mdecorde
                boolean flagAna = false;
201 321 mdecorde
                boolean inW = false;
202 321 mdecorde
                int wcounter = 1;
203 321 mdecorde
                if (!GoToText()) {
204 321 mdecorde
                        println "Error: no teiHeader tag found in text '"+this.text+"' (please check file format or content)"
205 321 mdecorde
                        return false;
206 321 mdecorde
                }
207 321 mdecorde
                int wc = 0; // TEMP FOR TEST ONLY
208 321 mdecorde
                try {
209 321 mdecorde
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next())
210 321 mdecorde
                        {
211 321 mdecorde
                                switch (event) {
212 321 mdecorde
                                        case XMLStreamConstants.START_ELEMENT:
213 321 mdecorde
                                                //println "start: "+parser.getLocalName()
214 321 mdecorde
                                                switch (parser.getLocalName()) {
215 321 mdecorde
                                                        case "w":
216 321 mdecorde
217 321 mdecorde
                                                        inW = true;
218 321 mdecorde
                                                        anavalues = [:]
219 321 mdecorde
                                                        wordid = parser.getAttributeValue(null, "id")
220 321 mdecorde
                                                        if (wordid == null)
221 321 mdecorde
                                                                wordid = "w_"+text+"_"+(wcounter++)
222 321 mdecorde
223 321 mdecorde
                                                        vAna ="";
224 321 mdecorde
                                                        break;
225 321 mdecorde
226 321 mdecorde
                                                        case "form":
227 321 mdecorde
                                                        String type2 = parser.getAttributeValue(null, "type");
228 321 mdecorde
                                                        if(type2 == null || type2.equals("default")) {
229 321 mdecorde
                                                                flagForm = true;
230 321 mdecorde
                                                                vForm = "";
231 321 mdecorde
                                                        } else {
232 321 mdecorde
                                                                flagAna = true;
233 321 mdecorde
                                                                vAna += "\t";
234 321 mdecorde
                                                                if(!anatypes.contains(type2))
235 321 mdecorde
                                                                        anatypes << type2;
236 321 mdecorde
                                                        }
237 321 mdecorde
238 321 mdecorde
                                                        break;
239 321 mdecorde
240 321 mdecorde
                                                        case "ana":
241 321 mdecorde
                                                        flagAna = true;
242 321 mdecorde
                                                        anavalue = "";
243 321 mdecorde
                                                        anatype = parser.getAttributeValue(null, "type");
244 321 mdecorde
                                                        if (anatype != null) {
245 321 mdecorde
                                                                if(anatype.startsWith("#"))
246 321 mdecorde
                                                                        anatype = anatype.substring(1)
247 321 mdecorde
                                                                break;
248 321 mdecorde
                                                        }
249 321 mdecorde
                                                        break;
250 321 mdecorde
251 321 mdecorde
                                                        default:
252 321 mdecorde
                                                        if (!inW) {
253 321 mdecorde
                                                                output.write("<"+parser.getLocalName().toLowerCase());
254 321 mdecorde
                                                                if (!sAttribs.containsKey(parser.getLocalName()))
255 321 mdecorde
                                                                        sAttribs.put(parser.getLocalName().toLowerCase(), []);
256 321 mdecorde
257 321 mdecorde
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
258 321 mdecorde
                                                                        String attrname = parser.getAttributeLocalName(i).toLowerCase();
259 321 mdecorde
                                                                        String attrvalue = parser.getAttributeValue(i);
260 321 mdecorde
                                                                        if (!(parser.getLocalName() == "text" && attrname == "id"))
261 321 mdecorde
                                                                                output.write(" "+attrname+"=\""+attrvalue.replace("\"", "'")+"\"");
262 321 mdecorde
263 321 mdecorde
                                                                        if (!sAttribs.get(parser.getLocalName().toLowerCase()).contains(attrname))
264 321 mdecorde
                                                                                sAttribs.get(parser.getLocalName().toLowerCase()).add(attrname)
265 321 mdecorde
                                                                }
266 321 mdecorde
267 321 mdecorde
                                                                if (parser.getLocalName() == "text") {
268 1115 mdecorde
                                                                        output.write(" id=\""+text+"\" base=\""+base+"\" project=\""+projectName+"\"");
269 321 mdecorde
                                                                }
270 321 mdecorde
                                                                output.write(">\n");
271 321 mdecorde
                                                        }
272 321 mdecorde
                                                }
273 321 mdecorde
                                                break;
274 321 mdecorde
275 321 mdecorde
                                        case XMLStreamConstants.END_ELEMENT:
276 321 mdecorde
                                                switch (parser.getLocalName()) {
277 321 mdecorde
                                                        case "TEI":
278 321 mdecorde
                                                                break;
279 321 mdecorde
                                                        case "w":
280 321 mdecorde
                                                        for (String t : anatypes) {
281 321 mdecorde
                                                                def v = anavalues.get(t);
282 321 mdecorde
                                                                if (v != null) vAna +="\t"+v;
283 321 mdecorde
                                                                else vAna +="\t";
284 321 mdecorde
                                                        }
285 321 mdecorde
286 321 mdecorde
                                                        output.write( vForm.replaceAll("&", "&amp;").replaceAll("<", "&lt;") +"\t"+wordid+vAna+"\n");
287 321 mdecorde
                                                        vAna = "";
288 321 mdecorde
                                                        vForm = "";
289 321 mdecorde
                                                        inW = false;
290 321 mdecorde
                                                        break;
291 321 mdecorde
292 321 mdecorde
                                                        case "form":
293 321 mdecorde
                                                        flagForm = false;
294 321 mdecorde
                                                        flagAna = false;
295 321 mdecorde
                                                        break;
296 321 mdecorde
297 321 mdecorde
                                                        case "ana":
298 321 mdecorde
                                                        anavalues.put(anatype, anavalue)
299 321 mdecorde
                                                        flagAna = false;
300 321 mdecorde
                                                        break;
301 321 mdecorde
302 321 mdecorde
                                                        default:
303 321 mdecorde
                                                        if(!inW)
304 321 mdecorde
                                                                output.write("</"+parser.getLocalName().toLowerCase()+">\n");
305 321 mdecorde
306 321 mdecorde
                                                }
307 321 mdecorde
                                                break;
308 321 mdecorde
309 321 mdecorde
                                        case XMLStreamConstants.CHARACTERS:
310 321 mdecorde
                                                if(inW)
311 321 mdecorde
                                                {
312 321 mdecorde
                                                        if(flagForm) {
313 321 mdecorde
                                                                vForm += parser.getText().trim();
314 321 mdecorde
                                                        }
315 321 mdecorde
                                                        else if (flagAna) {
316 321 mdecorde
                                                                anavalue += parser.getText().trim();
317 321 mdecorde
                                                        }
318 321 mdecorde
                                                }
319 321 mdecorde
                                                break;
320 321 mdecorde
                                }
321 321 mdecorde
                        }
322 321 mdecorde
323 321 mdecorde
                        output.close();
324 1688 mdecorde
                        if (parser != null) parser.close();
325 1688 mdecorde
                if (inputData != null) inputData.close();
326 321 mdecorde
                }
327 321 mdecorde
                catch (Exception ex) {
328 804 mdecorde
                        System.out.println("Error while writing CQP file $ex");
329 321 mdecorde
                        ex.printStackTrace();
330 1688 mdecorde
                        if (parser != null) parser.close();
331 1688 mdecorde
                        if (inputData != null) inputData.close();
332 321 mdecorde
                        return false;
333 321 mdecorde
                }
334 321 mdecorde
                return true;
335 321 mdecorde
        }
336 321 mdecorde
337 321 mdecorde
338 321 mdecorde
339 321 mdecorde
        /**
340 321 mdecorde
         * Run.
341 321 mdecorde
         *
342 321 mdecorde
         * @param rootDirFile the root dir file
343 321 mdecorde
         * @param basename the basename
344 321 mdecorde
         * @return true, if successful
345 321 mdecorde
         */
346 1115 mdecorde
        public boolean run(Project project, File binDir, File txmDir, String basename, String corpusname, List<File> files)
347 321 mdecorde
        {
348 321 mdecorde
                anatypes = new ArrayList<String>();// init only 1 time
349 321 mdecorde
                anavalues = [:]
350 321 mdecorde
                sAttribs = new HashMap<String, List<String>>();// init only 1 time
351 321 mdecorde
                String rootDir = binDir.getAbsolutePath();
352 321 mdecorde
353 714 mdecorde
                if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
354 714 mdecorde
                        println ("Error: CWB executables not well set.")
355 321 mdecorde
                        return false;
356 321 mdecorde
                }
357 1804 mdecorde
                CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
358 1115 mdecorde
                if (corpus != null) {
359 1804 mdecorde
                        if (project.getDoUpdate()) {
360 1804 mdecorde
                                corpus.clean(); // remove old files
361 1804 mdecorde
                        } else {
362 1804 mdecorde
                                corpus.delete(); // remove old files and TXMResult children
363 1804 mdecorde
                        }
364 1804 mdecorde
                } else {
365 1804 mdecorde
                        corpus = new MainCorpus(project);
366 1804 mdecorde
                        corpus.setID(project.getName());
367 1804 mdecorde
                        corpus.setName(project.getName());
368 321 mdecorde
                }
369 1615 mdecorde
                corpus.setDescription("Built with the XML-TXM import module");
370 1115 mdecorde
371 1115 mdecorde
                File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
372 1395 mdecorde
cqpFile.delete()
373 1115 mdecorde
                new File(binDir,"cqp").mkdirs()
374 1115 mdecorde
                new File(binDir,"data").mkdirs()
375 1115 mdecorde
                new File(binDir,"registry").mkdirs()
376 321 mdecorde
377 321 mdecorde
                String textid = "";
378 321 mdecorde
                int counttext = 0;
379 321 mdecorde
                //List<File> files = txmDir.listFiles();
380 321 mdecorde
381 321 mdecorde
                // get all anatypes
382 321 mdecorde
                for (File f : files) {
383 321 mdecorde
                        getAnaTypes(f)
384 321 mdecorde
                }
385 321 mdecorde
386 321 mdecorde
                //0 set Lang
387 803 mdecorde
                if (createOutput(cqpFile)) {
388 321 mdecorde
                        output.write("<txmcorpus lang=\""+lang+"\">\n");
389 321 mdecorde
                        output.close();
390 321 mdecorde
                }
391 804 mdecorde
                //1- Transform into CQP file
392 321 mdecorde
                def builder = null;
393 321 mdecorde
                for (File f : files) {
394 321 mdecorde
                        counttext++;
395 321 mdecorde
                        if (!f.exists()) {
396 321 mdecorde
                                println("file "+f+ " does not exists")
397 321 mdecorde
                        } else {
398 321 mdecorde
                                //println("process file "+f)
399 321 mdecorde
                                String txtname = f.getName().substring(0,f.getName().length()-4);
400 321 mdecorde
                                builder = new compiler(f.toURI().toURL(), txtname, corpusname.toLowerCase(), "default");
401 321 mdecorde
                                builder.setLang(lang);
402 804 mdecorde
                                if(!builder.transfomFileCqp(cqpFile))
403 321 mdecorde
                                        return false;
404 321 mdecorde
                        }
405 321 mdecorde
                }
406 321 mdecorde
407 321 mdecorde
                //end corpus
408 803 mdecorde
                if (createOutput(cqpFile)) {
409 321 mdecorde
                        output.write("</txmcorpus>\n");
410 321 mdecorde
                        output.close();
411 321 mdecorde
                }
412 321 mdecorde
413 321 mdecorde
                //2- Import into CWB
414 321 mdecorde
415 321 mdecorde
                CwbEncode cwbEn = new CwbEncode();
416 321 mdecorde
                cwbEn.setDebug(debug);
417 321 mdecorde
                CwbMakeAll cwbMa = new CwbMakeAll();
418 321 mdecorde
                cwbMa.setDebug(debug);
419 321 mdecorde
                List<String> pargs = ["id"];
420 321 mdecorde
                for(String ana : anatypes)
421 321 mdecorde
                        pargs.add(ana);
422 321 mdecorde
423 321 mdecorde
                List<String> sargs = [];
424 321 mdecorde
                //println "Found Sattributes "+this.sAttribs;
425 321 mdecorde
                if(sAttribs.containsKey("text")) {
426 321 mdecorde
                        if(!sAttribs.get("text").contains("id"))
427 321 mdecorde
                                sAttribs.get("text").add("id");
428 321 mdecorde
                        if(!sAttribs.get("text").contains("base"))
429 321 mdecorde
                                sAttribs.get("text").add("base");
430 321 mdecorde
                        if(!sAttribs.get("text").contains("project"))
431 321 mdecorde
                                sAttribs.get("text").add("project");
432 321 mdecorde
                } else {
433 321 mdecorde
                        sargs.add("text:0+id+base+project")
434 321 mdecorde
                }
435 321 mdecorde
436 321 mdecorde
                if (sAttribs.containsKey("txmcorpus")) {
437 321 mdecorde
                        if(!sAttribs.get("txmcorpus").contains("lang"))
438 321 mdecorde
                                sAttribs.get("txmcorpus").add("lang");
439 321 mdecorde
                } else {
440 321 mdecorde
                        sargs.add("txmcorpus:0+lang")
441 321 mdecorde
                }
442 321 mdecorde
443 321 mdecorde
                for (String tag : this.sAttribs.keySet()) {
444 321 mdecorde
                        String sAttr = tag;
445 321 mdecorde
                        if(sAttribs.get(tag).size() > 0)
446 321 mdecorde
                                sAttr += ":";
447 321 mdecorde
                        for(String attr : sAttribs.get(tag))
448 321 mdecorde
                                sAttr +="+"+attr;
449 321 mdecorde
                        sargs.add(sAttr)
450 321 mdecorde
                }
451 321 mdecorde
452 321 mdecorde
453 321 mdecorde
454 321 mdecorde
                String[] sAttributes = sargs;
455 321 mdecorde
                String[] pAttributes = pargs;
456 321 mdecorde
                println "sAttributes : "+sAttributes;
457 321 mdecorde
                println "pAttributes : "+pAttributes;
458 321 mdecorde
                try {
459 321 mdecorde
                        String regPath = rootDir + "/registry/"+corpusname.toLowerCase()
460 714 mdecorde
                        cwbEn.run(
461 321 mdecorde
                                rootDir + "/data/$corpusname",
462 803 mdecorde
                                cqpFile.getAbsolutePath(),
463 321 mdecorde
                                regPath, pAttributes, sAttributes);
464 321 mdecorde
                        if (!new File(regPath).exists()) {
465 321 mdecorde
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
466 321 mdecorde
                                return false;
467 321 mdecorde
                        }
468 714 mdecorde
                        cwbMa.run(corpusname, rootDir + "/registry");
469 321 mdecorde
470 321 mdecorde
                } catch (Exception ex) {
471 321 mdecorde
                        System.out.println("Error while creating indexes with CQP tools: $ex");
472 321 mdecorde
                        ex.printStackTrace();
473 321 mdecorde
                        return false;
474 321 mdecorde
                }
475 321 mdecorde
476 321 mdecorde
                return true;
477 321 mdecorde
        }
478 321 mdecorde
479 321 mdecorde
        /**
480 321 mdecorde
         * Sets the debug.
481 321 mdecorde
         */
482 321 mdecorde
        public void setDebug()
483 321 mdecorde
        {
484 321 mdecorde
                this.debug = true;
485 321 mdecorde
        }
486 321 mdecorde
487 321 mdecorde
        /**
488 321 mdecorde
         * The main method.
489 321 mdecorde
         *
490 321 mdecorde
         * @param args the arguments
491 321 mdecorde
         */
492 321 mdecorde
        public static void main(String[] args)
493 321 mdecorde
        {
494 321 mdecorde
                File dir = new File("~/xml/geo");
495 321 mdecorde
                def c = new compiler();
496 321 mdecorde
                c.setDebug();
497 321 mdecorde
                c.setCwbPath("~/TXM/cwb/bin");
498 321 mdecorde
                c.run(dir,"geo");
499 321 mdecorde
        }
500 321 mdecorde
501 321 mdecorde
        private void getAnaTypes(File xmlFile) {
502 321 mdecorde
                inputData = xmlFile.toURI().toURL().openStream();
503 321 mdecorde
                factory = XMLInputFactory.newInstance();
504 321 mdecorde
                parser = factory.createXMLStreamReader(inputData);
505 321 mdecorde
                String ana = "ana"
506 321 mdecorde
                HashSet<String> types = new HashSet<String>();
507 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
508 321 mdecorde
                        if (event == XMLStreamConstants.START_ELEMENT) { // start elem
509 321 mdecorde
                                if (ana.equals(parser.getLocalName())) { // ana elem
510 321 mdecorde
                                        for (int i = 0 ; i < parser.getAttributeCount(); i++) { // find @type
511 321 mdecorde
                                                if ("type".equals(parser.getAttributeLocalName(i))) { // @type
512 321 mdecorde
                                                        types.add(parser.getAttributeValue(i).substring(1)); //remove the #
513 321 mdecorde
                                                        break;
514 321 mdecorde
                                                }
515 321 mdecorde
                                        }
516 321 mdecorde
                                }
517 321 mdecorde
                        }
518 321 mdecorde
                }
519 1688 mdecorde
520 1688 mdecorde
                if (parser != null) parser.close();
521 1688 mdecorde
                if (inputData != null) inputData.close();
522 321 mdecorde
523 321 mdecorde
                for (String type : types)
524 321 mdecorde
                        if (!anatypes.contains(type))
525 321 mdecorde
                                anatypes << type
526 321 mdecorde
        }
527 321 mdecorde
}