Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / XMLTXM2WTC.groovy @ 2473

History | View | Annotate | Download (13 kB)

1 881 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 881 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 881 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 881 mdecorde
// Sophia Antipolis, University of Paris 3.
5 1094 mdecorde
//
6 881 mdecorde
// The TXM platform is free software: you can redistribute it
7 881 mdecorde
// and/or modify it under the terms of the GNU General Public
8 881 mdecorde
// License as published by the Free Software Foundation,
9 881 mdecorde
// either version 2 of the License, or (at your option) any
10 881 mdecorde
// later version.
11 1094 mdecorde
//
12 881 mdecorde
// The TXM platform is distributed in the hope that it will be
13 881 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 881 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 881 mdecorde
// PURPOSE. See the GNU General Public License for more
16 881 mdecorde
// details.
17 1094 mdecorde
//
18 881 mdecorde
// You should have received a copy of the GNU General
19 881 mdecorde
// Public License along with the TXM platform. If not, see
20 881 mdecorde
// http://www.gnu.org/licenses.
21 1094 mdecorde
//
22 1094 mdecorde
//
23 1094 mdecorde
//
24 881 mdecorde
// $LastChangedDate: 2017-04-11 15:30:35 +0200 (mar. 11 avril 2017) $
25 881 mdecorde
// $LastChangedRevision: 3426 $
26 1094 mdecorde
// $LastChangedBy: mdecorde $
27 881 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.importer
29 881 mdecorde
30 881 mdecorde
import java.text.DateFormat;
31 881 mdecorde
import java.util.Date;
32 881 mdecorde
import java.util.ArrayList;
33 881 mdecorde
import java.util.HashMap;
34 881 mdecorde
import java.util.LinkedHashMap;
35 881 mdecorde
import javax.xml.stream.*;
36 881 mdecorde
import java.net.URL;
37 881 mdecorde
import org.txm.importer.filters.*;
38 881 mdecorde
// TODO: Auto-generated Javadoc
39 881 mdecorde
40 881 mdecorde
/**
41 881 mdecorde
 * The Class XMLTXM2CQP.
42 881 mdecorde
 *
43 881 mdecorde
 * @author mdecorde
44 881 mdecorde
 * simple transofmration of a xml-tei-txm file into cqp file
45 881 mdecorde
 */
46 881 mdecorde
47 881 mdecorde
class XMLTXM2CQP
48 881 mdecorde
{
49 1094 mdecorde
50 881 mdecorde
        /** The url. */
51 881 mdecorde
        private def url;
52 1094 mdecorde
53 881 mdecorde
        /** The input data. */
54 881 mdecorde
        private def inputData;
55 1094 mdecorde
56 881 mdecorde
        /** The factory. */
57 881 mdecorde
        private def factory;
58 1094 mdecorde
59 881 mdecorde
        /** The parser. */
60 881 mdecorde
        private XMLStreamReader parser;
61 1094 mdecorde
62 881 mdecorde
        /** The output. */
63 881 mdecorde
        private def output;
64 1094 mdecorde
65 881 mdecorde
        /** The hashmap of txm:form and txm:ana values and the attributes hash*/
66 881 mdecorde
        LinkedHashMap<String, String> anahash = new LinkedHashMap<String, String>();
67 881 mdecorde
        LinkedHashMap<String, String> formhash = new LinkedHashMap<String, String>();
68 881 mdecorde
        LinkedHashMap<String, String> wordattributes = new LinkedHashMap<String, String>();
69 1094 mdecorde
70 881 mdecorde
        /** The balisesfound. */
71 881 mdecorde
        HashMap<String, List<String>> balisesfound;// = new HashMap<String, List<String>>();
72 881 mdecorde
73 881 mdecorde
        /** The balises to keep. */
74 881 mdecorde
        List<String> balisesToKeep;
75 1094 mdecorde
76 881 mdecorde
        /** The send to p attributes. */
77 881 mdecorde
        HashMap <String, List<String>> sendToPAttributes;// = new HashMap<String, List<String>>();
78 1094 mdecorde
79 881 mdecorde
        /** The injected p attributes. */
80 881 mdecorde
        List<String> injectedPAttributes = new ArrayList<String>();
81 1094 mdecorde
82 881 mdecorde
        /** The default reference : a pattern + the properties to use */
83 881 mdecorde
        List<String> defaultReferences = new ArrayList<String>();
84 881 mdecorde
        String defaultReferencePattern;
85 1094 mdecorde
86 881 mdecorde
        /** The injected p attributes values. */
87 881 mdecorde
        HashMap <String, String> injectedPAttributesValues;// = new ArrayList<String>();
88 1094 mdecorde
89 881 mdecorde
        /** The addinfos. */
90 881 mdecorde
        boolean addinfos = false;
91 1094 mdecorde
92 881 mdecorde
        /** The txtname. */
93 881 mdecorde
        String txtname;
94 1094 mdecorde
95 881 mdecorde
        /** The base. */
96 881 mdecorde
        String base;
97 1094 mdecorde
98 881 mdecorde
        /** The project. */
99 881 mdecorde
        String project;
100 1094 mdecorde
101 881 mdecorde
        /** The lang. */
102 881 mdecorde
        public String lang= "fr";
103 881 mdecorde
        public String currentForm;
104 881 mdecorde
        public String currentAna;
105 1094 mdecorde
106 881 mdecorde
        /**
107 881 mdecorde
         * Sets the lang.
108 881 mdecorde
         *
109 881 mdecorde
         * @param lang the lang
110 881 mdecorde
         * @return the java.lang. object
111 881 mdecorde
         */
112 881 mdecorde
        public setLang(String lang)
113 881 mdecorde
        {
114 881 mdecorde
                this.lang = lang;
115 881 mdecorde
        }
116 1094 mdecorde
117 881 mdecorde
        /**
118 881 mdecorde
         * Instantiates a new xMLTX m2 cqp.
119 881 mdecorde
         *
120 881 mdecorde
         * @param url the url
121 881 mdecorde
         */
122 881 mdecorde
        public XMLTXM2CQP(URL url){
123 881 mdecorde
                try {
124 881 mdecorde
                        this.url = url;
125 881 mdecorde
                        inputData = url.openStream();
126 881 mdecorde
                        factory = XMLInputFactory.newInstance();
127 1094 mdecorde
128 881 mdecorde
                        parser = factory.createXMLStreamReader(inputData);
129 1094 mdecorde
130 1094 mdecorde
131 881 mdecorde
                } catch (XMLStreamException ex) {
132 881 mdecorde
                        System.out.println(ex);
133 881 mdecorde
                }catch (IOException ex) {
134 881 mdecorde
                        System.out.println("IOException while parsing ");
135 881 mdecorde
                }
136 881 mdecorde
        }
137 1094 mdecorde
138 881 mdecorde
        /**
139 881 mdecorde
         * Sets the text info.
140 881 mdecorde
         *
141 881 mdecorde
         * @param name the name
142 881 mdecorde
         * @param base the base
143 881 mdecorde
         * @param project the project
144 881 mdecorde
         */
145 881 mdecorde
        public void setTextInfo(String name, String base, String project)
146 881 mdecorde
        {
147 881 mdecorde
                this.addinfos = true;
148 881 mdecorde
                this.txtname= name;
149 881 mdecorde
                this.base = base;
150 881 mdecorde
                this.project = project;
151 881 mdecorde
        }
152 1094 mdecorde
153 881 mdecorde
        /**
154 881 mdecorde
         * Creates the output.
155 881 mdecorde
         *
156 881 mdecorde
         * @param outfile the outfile
157 881 mdecorde
         * @return true, if successful
158 881 mdecorde
         */
159 881 mdecorde
        private boolean createOutput(File outfile)
160 881 mdecorde
        {
161 881 mdecorde
                try {
162 881 mdecorde
                        output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile, outfile.exists()) , "UTF-8"));
163 881 mdecorde
                        return true;
164 881 mdecorde
                } catch (Exception e) {
165 881 mdecorde
                        System.err.println(e);
166 881 mdecorde
                        return false;
167 881 mdecorde
                }
168 881 mdecorde
        }
169 1094 mdecorde
170 881 mdecorde
        /** The haspb. */
171 881 mdecorde
        boolean haspb = false;
172 1094 mdecorde
173 881 mdecorde
        /** The haslb. */
174 881 mdecorde
        boolean haslb = false;
175 1094 mdecorde
176 881 mdecorde
        /**
177 881 mdecorde
         * Transform file.
178 881 mdecorde
         *
179 881 mdecorde
         * @param outfile the outfile
180 881 mdecorde
         * @return true, if successful
181 881 mdecorde
         */
182 881 mdecorde
        public boolean transformFile(File outfile)
183 881 mdecorde
        {
184 881 mdecorde
                if(balisesToKeep == null)
185 881 mdecorde
                {
186 881 mdecorde
                        println "no element has been defined to be keeped"
187 881 mdecorde
                        return false;
188 881 mdecorde
                }
189 1094 mdecorde
190 881 mdecorde
                haspb = false;
191 1094 mdecorde
                haslb = false;
192 1094 mdecorde
193 881 mdecorde
                boolean flagAna;
194 881 mdecorde
                boolean flagForm;
195 881 mdecorde
                boolean flagWord;
196 881 mdecorde
                String vWord = "";
197 881 mdecorde
                String vForm = "";
198 881 mdecorde
                String vAna = "";
199 1094 mdecorde
200 881 mdecorde
                String lb_id = "";
201 881 mdecorde
                String pb_id = "";
202 1094 mdecorde
203 881 mdecorde
                wordattributes = [:];
204 881 mdecorde
                balisesfound = new HashMap<String, List<String>>();
205 1094 mdecorde
206 1094 mdecorde
207 881 mdecorde
                if(!createOutput(outfile))
208 881 mdecorde
                        return false;
209 1094 mdecorde
210 881 mdecorde
                if(sendToPAttributes != null)
211 881 mdecorde
                {
212 881 mdecorde
                        for(String tag: sendToPAttributes.keySet())
213 881 mdecorde
                                for(String attr : sendToPAttributes.get(tag))
214 881 mdecorde
                                        injectedPAttributes.add(tag+attr);
215 881 mdecorde
                        injectedPAttributesValues = [:];
216 881 mdecorde
                }
217 1094 mdecorde
218 881 mdecorde
                //output.write("<txmcorpus lang=\""+lang+"\">\n");
219 881 mdecorde
                balisesfound.put("txmcorpus",["lang"]);
220 1094 mdecorde
                try {
221 1094 mdecorde
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
222 1094 mdecorde
                                switch (event) {
223 1094 mdecorde
                                        case XMLStreamConstants.START_ELEMENT:
224 1094 mdecorde
                                                String localname = parser.getLocalName().toLowerCase();
225 1094 mdecorde
226 1094 mdecorde
                                        // we will only declare found tags in cwb registry
227 1094 mdecorde
                                                if(balisesToKeep.contains(localname)) {
228 1094 mdecorde
                                                        if(!balisesfound.containsKey(localname)) {
229 1094 mdecorde
                                                                balisesfound.put(localname, []);
230 1094 mdecorde
                                                        }
231 1094 mdecorde
232 1094 mdecorde
                                                        List<String> attrlist = balisesfound.get(localname);
233 1094 mdecorde
                                                        for (int i= 0 ; i < parser.getAttributeCount() ;i++ )
234 1094 mdecorde
                                                                if(!attrlist.contains(parser.getAttributeLocalName(i)))
235 1094 mdecorde
                                                                        attrlist.add(parser.getAttributeLocalName(i));
236 881 mdecorde
                                                }
237 1094 mdecorde
238 1094 mdecorde
                                                switch (localname) {
239 1094 mdecorde
                                                        case "w": // get word id !!
240 1094 mdecorde
                                                                wordattributes.put("id", parser.getAttributeValue(null, "id"));
241 1094 mdecorde
                                                                break;
242 1094 mdecorde
243 1094 mdecorde
                                                        case "form":
244 1094 mdecorde
                                                                flagForm = true;
245 1094 mdecorde
                                                                currentForm = parser.getAttributeValue(null, "type");
246 1094 mdecorde
                                                                if(currentForm == null)
247 1094 mdecorde
                                                                        currentForm = "default";
248 1094 mdecorde
                                                                vForm = "";
249 1094 mdecorde
                                                                break;
250 1094 mdecorde
251 1094 mdecorde
                                                        case "ana":
252 1094 mdecorde
                                                                flagAna = true;
253 1094 mdecorde
                                                                vAna ="";
254 1094 mdecorde
255 1094 mdecorde
                                                                currentAna = (parser.getAttributeValue(null,"type"));
256 1094 mdecorde
                                                                if(currentAna != null)
257 1094 mdecorde
                                                                        currentAna = currentAna.substring(1)// remove the #
258 1094 mdecorde
                                                                else
259 1094 mdecorde
                                                                        flagAna = false;
260 1094 mdecorde
                                                                break;
261 1094 mdecorde
262 1094 mdecorde
                                                        default:
263 1094 mdecorde
264 1094 mdecorde
                                                                if (sendToPAttributes != null) {
265 1094 mdecorde
                                                                        //println "should store $localname ? with "+sendToPAttributes.keySet()
266 1094 mdecorde
                                                                        if (sendToPAttributes.keySet().contains(localname)) {
267 1094 mdecorde
                                                                                //println "store attr of "+localname
268 1094 mdecorde
                                                                                List<String> attrs = sendToPAttributes.get(localname);
269 1094 mdecorde
                                                                                for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) {
270 1094 mdecorde
                                                                                        if (attrs.contains(parser.getAttributeLocalName(i))) {
271 1094 mdecorde
                                                                                                injectedPAttributesValues.put(localname+parser.getAttributeLocalName(i).toLowerCase(),parser.getAttributeValue(i))
272 1094 mdecorde
                                                                                        }
273 881 mdecorde
                                                                                }
274 881 mdecorde
                                                                        }
275 881 mdecorde
                                                                }
276 1094 mdecorde
277 1094 mdecorde
                                                                if (balisesToKeep.contains(localname)) {
278 1094 mdecorde
                                                                        output.write("<"+localname);
279 1094 mdecorde
                                                                        //println "write <"+localname+"..."
280 1094 mdecorde
                                                                        //write attributes
281 1094 mdecorde
                                                                        boolean idwritten = false;
282 1094 mdecorde
                                                                        boolean basewritten = false;
283 1094 mdecorde
                                                                        boolean projectwritten = false;
284 1094 mdecorde
                                                                        for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) {
285 1094 mdecorde
                                                                                String attrname = parser.getAttributeLocalName(i).toLowerCase();
286 1094 mdecorde
                                                                                if (attrname == "id")
287 1094 mdecorde
                                                                                        idwritten = true;
288 1094 mdecorde
                                                                                if (attrname == "base")
289 1094 mdecorde
                                                                                        basewritten = true;
290 1094 mdecorde
                                                                                if (attrname == "project")
291 1094 mdecorde
                                                                                        projectwritten = true;
292 1395 mdecorde
293 1395 mdecorde
                                                                                output.write(" "+attrname+"=\""+parser.getAttributeValue(i).replace("&", "&amp;").replace("\"", "&quot;")+"\"" );
294 1094 mdecorde
                                                                        }
295 1094 mdecorde
296 1094 mdecorde
                                                                        if (localname.equals("text"))
297 1094 mdecorde
                                                                                if (addinfos) {
298 1094 mdecorde
                                                                                        List<String> attrlist = balisesfound.get(localname);
299 1094 mdecorde
300 1094 mdecorde
                                                                                        if (!idwritten) {
301 1094 mdecorde
                                                                                                output.write(" id=\""+txtname+"\"")
302 1094 mdecorde
                                                                                                attrlist.add("id");
303 1094 mdecorde
                                                                                        }
304 1094 mdecorde
                                                                                        if (!basewritten) {
305 1094 mdecorde
                                                                                                output.write(" base=\""+base+"\"");
306 1094 mdecorde
                                                                                                attrlist.add("base");
307 1094 mdecorde
                                                                                        }
308 1094 mdecorde
                                                                                        if (!projectwritten) {
309 1094 mdecorde
                                                                                                output.write(" project=\""+project+"\"");
310 1094 mdecorde
                                                                                                attrlist.add("project");
311 1094 mdecorde
                                                                                        }
312 1094 mdecorde
                                                                                }
313 1094 mdecorde
314 1094 mdecorde
                                                                        // finalize tag
315 1094 mdecorde
                                                                        output.write(">\n");
316 881 mdecorde
                                                                }
317 1094 mdecorde
                                                }
318 1094 mdecorde
                                                break;
319 1094 mdecorde
320 1094 mdecorde
                                        case XMLStreamConstants.END_ELEMENT:
321 1094 mdecorde
                                                String localname = parser.getLocalName().toLowerCase();
322 1094 mdecorde
                                                switch (localname) {
323 1094 mdecorde
                                                        case "form":
324 1094 mdecorde
                                                                if(flagForm)
325 1094 mdecorde
                                                                        formhash.put(currentForm, vForm);
326 1094 mdecorde
                                                                flagForm = false;
327 1094 mdecorde
                                                                break;
328 1094 mdecorde
329 1094 mdecorde
                                                        case "ana":
330 1094 mdecorde
                                                                if(flagAna)
331 1094 mdecorde
                                                                        anahash.put(currentAna, vAna);
332 1094 mdecorde
                                                                flagAna = false;
333 1094 mdecorde
                                                                break;
334 1094 mdecorde
335 1094 mdecorde
                                                        case "w":
336 1094 mdecorde
                                                                vWord = "";
337 1094 mdecorde
                                                                vWord = formhash.get("default").replaceAll("&", "&amp;").replaceAll("<", "&lt;"); // get default form
338 1094 mdecorde
                                                                for (String form : formhash.keySet()) // and the others
339 1094 mdecorde
                                                                        if (form != "default")
340 1094 mdecorde
                                                                                vWord += "\t"+formhash.get(form);
341 1094 mdecorde
342 1094 mdecorde
                                                                for (String type : wordattributes.keySet()) // only word id ?
343 1094 mdecorde
                                                                        vWord+="\t"+wordattributes.get(type)
344 1094 mdecorde
345 1094 mdecorde
                                                                if (sendToPAttributes != null) // word attributes from structure properties
346 1094 mdecorde
                                                                {
347 1094 mdecorde
                                                                        //println "injectedPAttributesValues: "+injectedPAttributesValues
348 1094 mdecorde
                                                                        for(String pattr : injectedPAttributes)
349 1094 mdecorde
                                                                                vWord+="\t"+injectedPAttributesValues.get(pattr) ;//les attributs injecter
350 1094 mdecorde
                                                                }
351 1094 mdecorde
352 1094 mdecorde
                                                                for (String type : anahash.keySet()) // word annotations in txm:ana
353 1094 mdecorde
                                                                        vWord+="\t"+anahash.get(type)
354 1094 mdecorde
355 1094 mdecorde
                                                                output.write(vWord+"\n");
356 1094 mdecorde
                                                                vWord= "";
357 1094 mdecorde
                                                                break;
358 1094 mdecorde
359 1094 mdecorde
                                                        default:
360 1094 mdecorde
                                                                if (sendToPAttributes != null) // reset structure properties
361 1094 mdecorde
                                                                {
362 1094 mdecorde
                                                                        if (sendToPAttributes.keySet().contains(localname)) {
363 1094 mdecorde
                                                                                for (String attr : sendToPAttributes.get(localname)) {
364 1094 mdecorde
                                                                                        injectedPAttributesValues.put(attr, "N/A")
365 881 mdecorde
                                                                                }
366 881 mdecorde
                                                                        }
367 881 mdecorde
                                                                }
368 1094 mdecorde
369 1094 mdecorde
                                                                if (balisesToKeep.contains(localname)) {
370 1094 mdecorde
                                                                        output.write("</"+localname+">\n");
371 1094 mdecorde
                                                                }
372 1094 mdecorde
                                                }
373 1094 mdecorde
                                                break;
374 1094 mdecorde
375 1094 mdecorde
                                        case XMLStreamConstants.CHARACTERS:
376 1094 mdecorde
                                                if (flagForm) {
377 1094 mdecorde
                                                        vForm += parser.getText().trim();
378 1094 mdecorde
                                                }
379 1094 mdecorde
                                                if (flagAna) {
380 1094 mdecorde
                                                        vAna += parser.getText().trim();
381 1094 mdecorde
                                                }
382 1094 mdecorde
                                                break;
383 1094 mdecorde
                                }
384 881 mdecorde
                        }
385 1094 mdecorde
                        //output.write("</txmcorpus>\n");
386 1094 mdecorde
                        output.close();
387 1688 mdecorde
                        if (parser != null) parser.close();
388 1688 mdecorde
                if (inputData != null) inputData.close();
389 1094 mdecorde
                } catch (Exception ex) {
390 1094 mdecorde
                        println "Error while parsing $url : "+ex
391 1094 mdecorde
                        ex.printStackTrace();
392 1688 mdecorde
                        if (parser != null) parser.close();
393 1688 mdecorde
                        if (inputData != null) inputData.close();
394 1094 mdecorde
                        return false;
395 881 mdecorde
                }
396 881 mdecorde
                return true;
397 881 mdecorde
        }
398 1094 mdecorde
399 881 mdecorde
        /**
400 881 mdecorde
         * Gets the p attributs.
401 881 mdecorde
         *
402 881 mdecorde
         * @return the p attributs
403 881 mdecorde
         */
404 881 mdecorde
        public List<String> getpAttributs()
405 881 mdecorde
        {
406 881 mdecorde
                def pAttributs = [];
407 1094 mdecorde
408 881 mdecorde
                for (String wordattr : wordattributes.keySet()) {
409 881 mdecorde
                        pAttributs.add(wordattr);
410 881 mdecorde
                }
411 1094 mdecorde
412 881 mdecorde
                if (sendToPAttributes != null)
413 881 mdecorde
                        for (String pAttr : this.injectedPAttributes)
414 881 mdecorde
                                pAttributs.add(pAttr);
415 1094 mdecorde
416 881 mdecorde
                for (String anakey : anahash.keySet()) {
417 881 mdecorde
                        pAttributs.add(anakey);
418 881 mdecorde
                }
419 1094 mdecorde
420 881 mdecorde
                return pAttributs;
421 881 mdecorde
        }
422 1094 mdecorde
423 881 mdecorde
        /**
424 881 mdecorde
         * Gets the s attributs.
425 881 mdecorde
         *
426 881 mdecorde
         * @return the s attributs
427 881 mdecorde
         */
428 881 mdecorde
        public List<String> getsAttributs()
429 881 mdecorde
        {
430 881 mdecorde
                println balisesfound
431 881 mdecorde
                def sAttributs = [];
432 881 mdecorde
                for (String balise : this.balisesfound.keySet()) {
433 881 mdecorde
                        List<String> sAtt = this.balisesfound.get(balise);
434 881 mdecorde
                        String attributes = "";
435 881 mdecorde
                        for (String attr : sAtt) {
436 881 mdecorde
                                attributes+="+"+attr;
437 881 mdecorde
                        }
438 1094 mdecorde
439 881 mdecorde
                        if (sAtt.size() > 0)
440 881 mdecorde
                                sAttributs.add(balise +":"+attributes);
441 881 mdecorde
                        else
442 881 mdecorde
                                sAttributs.add(balise);
443 881 mdecorde
                }
444 881 mdecorde
                return sAttributs;
445 881 mdecorde
        }
446 1094 mdecorde
447 881 mdecorde
        /**
448 881 mdecorde
         * Sets the balises to keep.
449 881 mdecorde
         *
450 881 mdecorde
         * @param balisesToKeep the new balises to keep
451 881 mdecorde
         */
452 881 mdecorde
        public void setBalisesToKeep(List<String> balisesToKeep)
453 881 mdecorde
        {
454 881 mdecorde
                if (balisesToKeep != null)
455 881 mdecorde
                        this.balisesToKeep = balisesToKeep;
456 881 mdecorde
                else
457 881 mdecorde
                        println("Warning: the list of elements to keep is null")
458 881 mdecorde
        }
459 1094 mdecorde
460 881 mdecorde
        /**
461 881 mdecorde
         * Sets the defautl reference pattern
462 881 mdecorde
         * TODO: not implemented
463 881 mdecorde
         *
464 881 mdecorde
         * @param balisesToKeep the new balises to keep
465 881 mdecorde
         */
466 881 mdecorde
        public void setDefaultReference(String pattern, List<String> strucProperties)
467 881 mdecorde
        {
468 881 mdecorde
                if (defaultReferencePattern != null) {
469 881 mdecorde
                        this.defaultReferences = defaultReferences;
470 881 mdecorde
                        defaultReferencePattern = pattern;
471 881 mdecorde
                }
472 881 mdecorde
        }
473 1094 mdecorde
474 1094 mdecorde
475 881 mdecorde
        /**
476 881 mdecorde
         * Sets the send to p attributes.
477 881 mdecorde
         *
478 881 mdecorde
         * @param sendus the sendus
479 881 mdecorde
         */
480 881 mdecorde
        public void setSendToPAttributes(HashMap<String, List<String>> sendus)
481 881 mdecorde
        {
482 881 mdecorde
                if (sendus != null)
483 881 mdecorde
                        this.sendToPAttributes = sendus;
484 881 mdecorde
                else
485 881 mdecorde
                        println("Warning: the pAttributes to inject is null")
486 881 mdecorde
        }
487 1094 mdecorde
488 1094 mdecorde
489 881 mdecorde
        /**
490 881 mdecorde
         * The main method.
491 881 mdecorde
         *
492 881 mdecorde
         * @param args the arguments
493 881 mdecorde
         */
494 881 mdecorde
        public static void main(String[] args) {
495 1094 mdecorde
496 881 mdecorde
                String rootDir = "/home/mdecorde/TXM/corpora/CORNEILLEMOLIERETER/txm/CORNEILLEMOLIERETER";
497 1094 mdecorde
498 881 mdecorde
                File srcfile = new File(rootDir,"CORNEILLEP_AGESILAS_1666.xml");
499 881 mdecorde
                println srcfile.exists()
500 881 mdecorde
                File cqpfile = new File(rootDir, "out/CORNEILLEP_AGESILAS_1666.cqp");
501 881 mdecorde
                new File(rootDir,"out").deleteDir()
502 881 mdecorde
                new File(rootDir,"out").mkdir()
503 1094 mdecorde
504 881 mdecorde
                System.out.println("XMLTXM2CQP : "+srcfile+" >> "+cqpfile);
505 881 mdecorde
                def builder = new XMLTXM2CQP(srcfile.toURL());
506 881 mdecorde
                def balises = ["text", "s"];
507 881 mdecorde
                builder.setBalisesToKeep(balises);
508 881 mdecorde
                builder.transformFile(cqpfile);
509 1094 mdecorde
510 881 mdecorde
                println("SATTRIBUTS: "+builder.getsAttributs());
511 881 mdecorde
                println("PATTRIBUTS: "+builder.getpAttributs());
512 881 mdecorde
                return;
513 881 mdecorde
        }
514 881 mdecorde
}