Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / scripts / xmltxm / Xml2Ana.groovy @ 1000

History | View | Annotate | Download (15.5 kB)

1 986 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 986 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 986 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 986 mdecorde
// Sophia Antipolis, University of Paris 3.
5 986 mdecorde
//
6 986 mdecorde
// The TXM platform is free software: you can redistribute it
7 986 mdecorde
// and/or modify it under the terms of the GNU General Public
8 986 mdecorde
// License as published by the Free Software Foundation,
9 986 mdecorde
// either version 2 of the License, or (at your option) any
10 986 mdecorde
// later version.
11 986 mdecorde
//
12 986 mdecorde
// The TXM platform is distributed in the hope that it will be
13 986 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 986 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 986 mdecorde
// PURPOSE. See the GNU General Public License for more
16 986 mdecorde
// details.
17 986 mdecorde
//
18 986 mdecorde
// You should have received a copy of the GNU General
19 986 mdecorde
// Public License along with the TXM platform. If not, see
20 986 mdecorde
// http://www.gnu.org/licenses.
21 986 mdecorde
//
22 986 mdecorde
//
23 986 mdecorde
//
24 986 mdecorde
// $LastChangedDate: 2017-04-19 16:23:38 +0200 (mer. 19 avril 2017) $
25 986 mdecorde
// $LastChangedRevision: 3430 $
26 986 mdecorde
// $LastChangedBy: mdecorde $
27 986 mdecorde
//
28 986 mdecorde
package org.txm.importer.scripts.xmltxm
29 986 mdecorde
30 986 mdecorde
import org.txm.importer.StaxIdentityParser;
31 986 mdecorde
32 986 mdecorde
import java.text.DateFormat;
33 986 mdecorde
import java.util.ArrayList;
34 986 mdecorde
import java.util.Date;
35 986 mdecorde
import java.util.HashMap;
36 986 mdecorde
import java.util.Locale;
37 986 mdecorde
38 986 mdecorde
import javax.xml.stream.*;
39 986 mdecorde
40 986 mdecorde
import java.net.URL;
41 986 mdecorde
42 986 mdecorde
import org.txm.importer.filters.*;
43 1000 mdecorde
import org.txm.scripts.importer.HasElement
44 1000 mdecorde
import org.txm.scripts.importer.StaxStackWriter
45 986 mdecorde
import org.txm.utils.AsciiUtils;
46 986 mdecorde
47 986 mdecorde
/**
48 986 mdecorde
 * The Class Xml2Ana.
49 986 mdecorde
 *
50 986 mdecorde
 * @author mdecorde
51 986 mdecorde
 * transform : pre xml-tei file >> xml-tei-txm file
52 986 mdecorde
 * The pre xml-tei file must contains a minimal teiHeader with classDecl, encodingDesc and titleStmt
53 986 mdecorde
 *
54 986 mdecorde
 * you must specify the correspondance between word attributs and ana types&respStmtIDs
55 986 mdecorde
 * then the attributes of w tags will be transformed into interp tag
56 986 mdecorde
 */
57 1000 mdecorde
class Xml2Ana extends StaxIdentityParser
58 986 mdecorde
{
59 986 mdecorde
        /** The dir. */
60 986 mdecorde
        private def dir;
61 986 mdecorde
62 986 mdecorde
        /** The convert all attributes. */
63 986 mdecorde
        private boolean convertAllAttributes = false;
64 986 mdecorde
65 986 mdecorde
        /** The corresp type. */
66 986 mdecorde
        HashMap<String,String> correspType;
67 986 mdecorde
68 986 mdecorde
        /** The corresp ref. */
69 986 mdecorde
        HashMap<String,String> correspRef;
70 986 mdecorde
71 986 mdecorde
        /** The check tags. */
72 986 mdecorde
        HashMap<String,Boolean> checkTags = new HashMap<String,Boolean>();
73 986 mdecorde
74 986 mdecorde
        /** The resp id. */
75 986 mdecorde
        def respId = [];
76 986 mdecorde
77 986 mdecorde
        /** The applications. */
78 986 mdecorde
        HashMap<String,File> applications;
79 986 mdecorde
80 986 mdecorde
        /** The taxonomies. */
81 986 mdecorde
        HashMap<String,String[]> taxonomies;
82 986 mdecorde
83 986 mdecorde
        /** The resps. */
84 986 mdecorde
        HashMap<String,String[]> resps;
85 986 mdecorde
86 986 mdecorde
        /** The items. */
87 986 mdecorde
        HashMap<String,HashMap<String,String>> items;
88 986 mdecorde
89 986 mdecorde
        /** The XML headeradded. */
90 986 mdecorde
        boolean XMLHeaderadded = false;
91 986 mdecorde
        String textname;
92 986 mdecorde
        String wtag = "w";
93 986 mdecorde
94 986 mdecorde
        public static final String TEXT = "text"
95 986 mdecorde
        public static final String ID = "id"
96 986 mdecorde
97 986 mdecorde
        /**
98 986 mdecorde
         * Instantiates a new xml2 ana.
99 986 mdecorde
         *
100 986 mdecorde
         * @param url the url
101 986 mdecorde
         * @param wordprefix the wordprefix
102 986 mdecorde
         */
103 986 mdecorde
        public Xml2Ana(File file) {
104 986 mdecorde
                super(file.toURI().toURL());
105 986 mdecorde
                //File file = new File(url.getFile()).getAbsoluteFile()
106 986 mdecorde
                textname = file.getName();
107 986 mdecorde
                int idx = textname.lastIndexOf(".");
108 986 mdecorde
                if (idx > 0)
109 986 mdecorde
                        textname = textname.substring(0, idx)
110 986 mdecorde
111 986 mdecorde
112 986 mdecorde
                checkTags.put("respStmt",false);
113 986 mdecorde
                checkTags.put("titleStmt",false);
114 986 mdecorde
                checkTags.put("appInfo",false);
115 986 mdecorde
116 986 mdecorde
                hasText = new HasElement(file, TEXT).process();
117 986 mdecorde
        }
118 986 mdecorde
119 986 mdecorde
        /**
120 986 mdecorde
         * Sets the convert all atrtibutes.
121 986 mdecorde
         *
122 986 mdecorde
         * @param value the value
123 986 mdecorde
         * @return the java.lang. object
124 986 mdecorde
         */
125 986 mdecorde
        public setConvertAllAtrtibutes(boolean value) {
126 986 mdecorde
                convertAllAttributes = value;
127 986 mdecorde
        }
128 986 mdecorde
129 986 mdecorde
        /**
130 986 mdecorde
         * Sets the convert all atrtibutes.
131 986 mdecorde
         *
132 986 mdecorde
         * @param value the value
133 986 mdecorde
         * @return the java.lang. object
134 986 mdecorde
         */
135 986 mdecorde
        public setWordTag(String wtag) {
136 986 mdecorde
                this.wtag = wtag
137 986 mdecorde
        }
138 986 mdecorde
139 986 mdecorde
        int idcount = 0;
140 986 mdecorde
        boolean flagWord = false;
141 986 mdecorde
        int firstElement = 0;
142 986 mdecorde
        boolean teiElementAdded = false;
143 986 mdecorde
        boolean teiHeaderElementAdded = false;
144 986 mdecorde
        boolean hasText = false;
145 986 mdecorde
        boolean textElementAdded = false;
146 986 mdecorde
        def anabalises = [];
147 986 mdecorde
        protected void processStartElement()
148 986 mdecorde
        {
149 986 mdecorde
//                println "checkTags=$checkTags";
150 986 mdecorde
//                println "parser=$parser";
151 986 mdecorde
                firstElement++;
152 986 mdecorde
153 986 mdecorde
                if (this.checkTags.containsKey(parser.getLocalName())) {
154 986 mdecorde
                        this.checkTags.put(parser.getLocalName(), true);
155 986 mdecorde
                }
156 986 mdecorde
157 986 mdecorde
                switch (parser.getLocalName()) {
158 986 mdecorde
                        case wtag:
159 986 mdecorde
                                if (!hasText) {
160 986 mdecorde
                                        writer.writeStartElement(TEXT);
161 986 mdecorde
                                        writer.writeAttribute(ID, textname);
162 986 mdecorde
                                        textElementAdded = true;
163 986 mdecorde
                                        hasText = true;
164 986 mdecorde
                                }
165 986 mdecorde
                                idcount++; // increment word counter
166 986 mdecorde
                                anabalises.clear();
167 986 mdecorde
168 986 mdecorde
                                writer.writeStartElement(parser.getLocalName()); // write w
169 986 mdecorde
170 986 mdecorde
                                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) // write namespaces
171 986 mdecorde
                                        writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
172 986 mdecorde
173 986 mdecorde
                                for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) { // transform attributes
174 986 mdecorde
                                        String type = parser.getAttributeLocalName(i);
175 986 mdecorde
                                        String value = parser.getAttributeValue(i);
176 986 mdecorde
                                        if (correspType.containsKey(type)) { // check if txm:ana
177 986 mdecorde
                                                String corresptype = correspType.get(type);
178 986 mdecorde
                                                String ref = correspRef.get(type);
179 986 mdecorde
                                                anabalises.add(["#"+ref, "#"+corresptype, value]);
180 986 mdecorde
                                        } else if (type == ID) { // keep id attribute
181 986 mdecorde
                                                String wordid = value
182 986 mdecorde
                                                if (wordid.startsWith("w")) {
183 986 mdecorde
                                                        if (!wordid.startsWith("w_"))
184 986 mdecorde
                                                                wordid = "w_"+wordid.substring(1)
185 986 mdecorde
                                                }
186 986 mdecorde
//                                                else {
187 986 mdecorde
//                                                        wordid = "w_"+textname+"_"+wordid;
188 986 mdecorde
//                                                }
189 986 mdecorde
190 986 mdecorde
                                                wordid = AsciiUtils.buildAttributeId(wordid); // remove characters not compatible with the id attribute value
191 986 mdecorde
192 986 mdecorde
                                                writer.writeAttribute(type, wordid);
193 986 mdecorde
194 986 mdecorde
                                        } else { // add attributes that was in the original <w>
195 986 mdecorde
                                                if (convertAllAttributes)
196 986 mdecorde
                                                        anabalises.add(["none","#"+type, value])
197 986 mdecorde
                                                else
198 986 mdecorde
                                                        writer.writeAttribute(type, value);
199 986 mdecorde
                                        }
200 986 mdecorde
                                }
201 986 mdecorde
202 986 mdecorde
                                flagWord = true; // start to capture the form
203 986 mdecorde
                                writer.writeStartElement(TXMNS, "form");
204 986 mdecorde
                                break;
205 986 mdecorde
206 986 mdecorde
                        case "TEI":
207 986 mdecorde
                                super.processStartElement();
208 986 mdecorde
                                boolean hasTeiNS = false;
209 986 mdecorde
                                boolean hasTXMNs = false;
210 986 mdecorde
                                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
211 986 mdecorde
                                        if (parser.getNamespaceURI(i) == TXMNS)
212 986 mdecorde
                                                hasTXMNs = true;
213 986 mdecorde
                                        else if (parser.getNamespaceURI(i) == TEINS)
214 986 mdecorde
                                                hasTeiNS = true;
215 986 mdecorde
                                }
216 986 mdecorde
                                if (!hasTeiNS) {
217 986 mdecorde
                                        writer.writeDefaultNamespace(TEINS);
218 986 mdecorde
                                }
219 986 mdecorde
                                if (!hasTXMNs)
220 986 mdecorde
                                        writer.writeNamespace(TXM, TXMNS);
221 986 mdecorde
                                break;
222 986 mdecorde
223 986 mdecorde
                        default:
224 986 mdecorde
225 986 mdecorde
                                if (TEXT.equals(localname)) {
226 986 mdecorde
                                        hasText = true;
227 986 mdecorde
                                }
228 986 mdecorde
229 986 mdecorde
                                if (firstElement == 1) { // test if first element is TEI
230 986 mdecorde
                                        //println "first tag: "+parser.getLocalName()
231 986 mdecorde
                                        if (localname != "TEI") { // "TEI" is missing
232 986 mdecorde
                                                teiElementAdded = true;
233 986 mdecorde
                                                addTEIElement();
234 986 mdecorde
                                        } else if (!hasText) {
235 986 mdecorde
                                                writer.writeStartElement(TEXT);
236 986 mdecorde
                                                writer.writeAttribute(ID, textname);
237 986 mdecorde
                                                textElementAdded = true;
238 986 mdecorde
                                                hasText = true;
239 986 mdecorde
                                        }
240 986 mdecorde
                                }
241 986 mdecorde
                                if (firstElement == 2 && teiElementAdded != true) {
242 986 mdecorde
                                        //println "second tag: "+parser.getLocalName()
243 986 mdecorde
                                        if (localname != "teiHeader") { // teiHeader is missing
244 986 mdecorde
                                                writeTeiHeader();
245 986 mdecorde
                                                hasTeiHeader = true
246 986 mdecorde
                                                teiHeaderElementAdded = true
247 986 mdecorde
                                        }
248 986 mdecorde
                                } else if (!hasText & (teiElementAdded | teiHeaderElementAdded)) {
249 986 mdecorde
                                        writer.writeStartElement(TEXT);
250 986 mdecorde
                                        writer.writeAttribute(ID, textname);
251 986 mdecorde
                                        textElementAdded = true;
252 986 mdecorde
                                        hasText = true;
253 986 mdecorde
                                }
254 986 mdecorde
255 986 mdecorde
                                super.processStartElement();
256 986 mdecorde
                                if (TEXT.equals(localname)) {
257 986 mdecorde
                                        if (!parser.getAttributeValue(null, ID)) {
258 986 mdecorde
                                                writer.writeAttribute(ID, textname);
259 986 mdecorde
                                        }
260 986 mdecorde
                                }
261 986 mdecorde
                }
262 986 mdecorde
        }
263 986 mdecorde
264 986 mdecorde
        protected void after()
265 986 mdecorde
        {
266 986 mdecorde
                if (textElementAdded) {
267 986 mdecorde
                        writer.writeEndElement(); // text
268 986 mdecorde
                }
269 986 mdecorde
                if (teiElementAdded) {
270 986 mdecorde
                        writer.writeEndElement(); // TEI
271 986 mdecorde
                }
272 986 mdecorde
                super.after(); // close writer, parser, etc
273 986 mdecorde
        }
274 986 mdecorde
275 986 mdecorde
        protected void addTEIElement()
276 986 mdecorde
        {
277 986 mdecorde
                writer.writeStartElement("TEI");
278 986 mdecorde
                writer.writeDefaultNamespace(TEINS);
279 986 mdecorde
                writer.writeNamespace(TXM, TXMNS);
280 986 mdecorde
                writer.writeNamespace(TEI, TEINS);
281 986 mdecorde
                writeTeiHeader();
282 986 mdecorde
        }
283 986 mdecorde
284 986 mdecorde
        protected void processCharacters()
285 986 mdecorde
        {
286 986 mdecorde
                if (flagWord) {
287 986 mdecorde
                        writer.writeCharacters(parser.getText().trim()); // keep form in 1 line
288 986 mdecorde
                } else {
289 986 mdecorde
                        super.processCharacters();
290 986 mdecorde
                }
291 986 mdecorde
        }
292 986 mdecorde
293 986 mdecorde
        boolean hasClassDecl = false;
294 986 mdecorde
        boolean hasFileDesc = false;
295 986 mdecorde
        boolean hasEncodingDesc = false;
296 986 mdecorde
        boolean hasTeiHeader = false;
297 986 mdecorde
        boolean hasTEI = false;
298 986 mdecorde
        public static String ANA = "ana"
299 986 mdecorde
        public static String RESP = "resp"
300 986 mdecorde
        public static String TYPE = "type"
301 986 mdecorde
        protected void processEndElement()
302 986 mdecorde
        {
303 986 mdecorde
                switch (parser.getLocalName()) {
304 986 mdecorde
                        case wtag:
305 986 mdecorde
                                writer.writeEndElement(); // txm:form
306 986 mdecorde
                                for (def values : anabalises)
307 986 mdecorde
                                {// <txm:ana resp=ref type=corresptype>value</txm:ana>
308 986 mdecorde
                                        writer.writeStartElement(TXMNS, ANA);
309 986 mdecorde
                                        writer.writeAttribute(RESP, values[0]);
310 986 mdecorde
                                        writer.writeAttribute(TYPE, values[1]);
311 986 mdecorde
                                        writer.writeCharacters(values[2]);
312 986 mdecorde
                                        writer.writeEndElement(); // txm:ana
313 986 mdecorde
                                }
314 986 mdecorde
315 986 mdecorde
                                flagWord = false;
316 986 mdecorde
                                break;
317 986 mdecorde
318 986 mdecorde
                        case "fileDesc":
319 986 mdecorde
                                hasFileDesc = true;
320 986 mdecorde
                                this.writeTXMResps();
321 986 mdecorde
                                break;
322 986 mdecorde
323 986 mdecorde
                        case "classDecl":
324 986 mdecorde
                                hasClassDecl=true;
325 986 mdecorde
                                this.writeTXMTaxonomies();
326 986 mdecorde
                                break;
327 986 mdecorde
                        case "encodingDesc":
328 986 mdecorde
                                hasEncodingDesc = true;
329 986 mdecorde
                                writeContentOfEncodingDesc();
330 986 mdecorde
                                break;
331 986 mdecorde
332 986 mdecorde
                        case "teiHeader":
333 986 mdecorde
                                hasTeiHeader = true
334 986 mdecorde
                                if (!hasEncodingDesc) {
335 986 mdecorde
                                        writer.writeStartElement("encodingDesc");
336 986 mdecorde
                                        writeContentOfEncodingDesc();
337 986 mdecorde
                                        writer.writeEndElement();
338 986 mdecorde
                                }
339 986 mdecorde
340 986 mdecorde
                                break;
341 986 mdecorde
                        case "TEI":
342 986 mdecorde
                                hasTEI = true;
343 986 mdecorde
                                if (!hasTeiHeader) {
344 986 mdecorde
                                        writeTeiHeader();
345 986 mdecorde
                                }
346 986 mdecorde
                                break;
347 986 mdecorde
                }
348 986 mdecorde
349 986 mdecorde
                super.processEndElement();
350 986 mdecorde
        }
351 986 mdecorde
352 986 mdecorde
        protected void writeTeiHeader()
353 986 mdecorde
        {
354 986 mdecorde
                writer.writeStartElement("teiHeader");
355 986 mdecorde
                writer.writeStartElement("fileDesc")
356 986 mdecorde
                this.writeTXMResps();
357 986 mdecorde
                writer.writeStartElement("titleStmt")
358 986 mdecorde
                writer.writeStartElement("title")
359 986 mdecorde
                writer.writeEndElement(); // title
360 986 mdecorde
                writer.writeEndElement(); // titleStmt
361 986 mdecorde
                writer.writeStartElement("publicationStmt")
362 986 mdecorde
                writer.writeEndElement(); // publicationStmt
363 986 mdecorde
                writer.writeStartElement("sourceDesc")
364 986 mdecorde
                writer.writeEndElement(); // sourceDesc
365 986 mdecorde
                writer.writeEndElement(); // fileDesc
366 986 mdecorde
                writer.writeStartElement("encodingDesc");
367 986 mdecorde
                writeContentOfEncodingDesc();
368 986 mdecorde
                writer.writeEndElement(); // encodingDesc
369 986 mdecorde
                writer.writeEndElement(); // teiHeader
370 986 mdecorde
        }
371 986 mdecorde
372 986 mdecorde
        protected void writeContentOfEncodingDesc()
373 986 mdecorde
        {
374 986 mdecorde
                writer.writeStartElement("appInfo")
375 986 mdecorde
                this.writeTXMApps();
376 986 mdecorde
                writer.writeEndElement(); // appInfo
377 986 mdecorde
                if (!hasClassDecl) {
378 986 mdecorde
                        writer.writeStartElement("classDecl");
379 986 mdecorde
                        this.writeTXMTaxonomies();
380 986 mdecorde
                        writer.writeEndElement(); // classDecl
381 986 mdecorde
                }
382 986 mdecorde
        }
383 986 mdecorde
384 986 mdecorde
        /**
385 986 mdecorde
         * Check resp.
386 986 mdecorde
         *
387 986 mdecorde
         * @return the string
388 986 mdecorde
         */
389 986 mdecorde
        public String checkResp()
390 986 mdecorde
        {
391 986 mdecorde
                String rez ="found tags : \n";
392 986 mdecorde
                for (String key : checkTags.keySet())
393 986 mdecorde
                        rez += "\t"+key+"\n";
394 986 mdecorde
                return rez;
395 986 mdecorde
        }
396 986 mdecorde
397 986 mdecorde
        /**
398 986 mdecorde
         * Sets the correspondances.
399 986 mdecorde
         *
400 986 mdecorde
         * @param correspRef the corresp ref
401 986 mdecorde
         * @param correspType the corresp type
402 986 mdecorde
         */
403 986 mdecorde
        public void setCorrespondances(correspRef, correspType)
404 986 mdecorde
        {
405 986 mdecorde
                this.correspRef = correspRef;
406 986 mdecorde
                this.correspType = correspType;
407 986 mdecorde
        }
408 986 mdecorde
409 986 mdecorde
        /**
410 986 mdecorde
         * Sets the header infos.
411 986 mdecorde
         *
412 986 mdecorde
         * @param respId the resp id
413 986 mdecorde
         * @param resps the resps
414 986 mdecorde
         * @param applications the applications
415 986 mdecorde
         * @param taxonomies the taxonomies
416 986 mdecorde
         * @param items the items
417 986 mdecorde
         */
418 986 mdecorde
        public void setHeaderInfos(respId,resps, applications, taxonomies, items)
419 986 mdecorde
        {
420 986 mdecorde
                this.respId = respId
421 986 mdecorde
                this.resps = resps
422 986 mdecorde
                this.applications = applications
423 986 mdecorde
                this.taxonomies = taxonomies;
424 986 mdecorde
                this.items = items;
425 986 mdecorde
        }
426 986 mdecorde
427 986 mdecorde
        /**
428 986 mdecorde
         * Write txm resps.
429 986 mdecorde
         */
430 986 mdecorde
        public void writeTXMResps()
431 986 mdecorde
        {
432 986 mdecorde
                for (String ref : respId) {
433 986 mdecorde
                        String[] infos = resps.get(ref);
434 986 mdecorde
                        writer.writeStartElement("respStmt");
435 986 mdecorde
                        writer.writeStartElement(RESP);
436 986 mdecorde
                        writer.writeAttribute(ID,ref);
437 986 mdecorde
                        writer.writeCharacters(infos[0]);
438 986 mdecorde
                        writer.writeStartElement("date");
439 986 mdecorde
                        writer.writeAttribute("when",infos[2]);
440 986 mdecorde
                        writer.writeCharacters(infos[3]);
441 986 mdecorde
                        writer.writeEndElement(); // date
442 986 mdecorde
                        writer.writeEndElement(); //resp
443 986 mdecorde
                        writer.writeStartElement("name");
444 986 mdecorde
                        writer.writeAttribute(TYPE, "person");
445 986 mdecorde
                        writer.writeCharacters(infos[1])
446 986 mdecorde
                        writer.writeEndElement(); // name
447 986 mdecorde
                        writer.writeEndElement(); //respStmt
448 986 mdecorde
                }
449 986 mdecorde
        }
450 986 mdecorde
451 986 mdecorde
        /**
452 986 mdecorde
         * Write txm apps.
453 986 mdecorde
         */
454 986 mdecorde
        public void writeTXMApps()
455 986 mdecorde
        {
456 986 mdecorde
                for (String ref : respId) {
457 986 mdecorde
                        List<String> list= applications.get(ref);
458 986 mdecorde
                        String ident = list.get(0);
459 986 mdecorde
                        String version = list.get(1);
460 986 mdecorde
                        File report = list.get(2);
461 986 mdecorde
462 986 mdecorde
                        writer.writeStartElement(TXMNS, "application");
463 986 mdecorde
                        writer.writeAttribute("ident", ident);
464 986 mdecorde
                        writer.writeAttribute("version", version);
465 986 mdecorde
                        writer.writeAttribute(RESP, ref);
466 986 mdecorde
467 986 mdecorde
                        //get txm:commandLine from GeneratedReport
468 986 mdecorde
                        if (report != null) {
469 986 mdecorde
                                writer.writeCharacters("");writer.flush();
470 986 mdecorde
                                Reader reader = new FileReader(report);
471 986 mdecorde
                                String line = reader.readLine();
472 986 mdecorde
                                while (line != null) {
473 986 mdecorde
                                        if (line.length() != 0)
474 986 mdecorde
                                                output.write(line+"\n");
475 986 mdecorde
                                        line = reader.readLine();
476 986 mdecorde
                                }
477 986 mdecorde
                                reader.close();
478 986 mdecorde
                        }
479 986 mdecorde
480 986 mdecorde
                        writer.writeStartElement("ab");
481 986 mdecorde
                        writer.writeAttribute(TYPE, "annotation");
482 986 mdecorde
                        for (String item : taxonomies.get(ref)) {
483 986 mdecorde
                                writer.writeStartElement("list");
484 986 mdecorde
                                writer.writeEmptyElement("ref");
485 986 mdecorde
                                writer.writeAttribute(TYPE, "tagset");
486 986 mdecorde
                                writer.writeAttribute("target", item);
487 986 mdecorde
                                writer.writeEndElement(); // list
488 986 mdecorde
                        }
489 986 mdecorde
                        writer.writeEndElement(); // ab
490 986 mdecorde
                        writer.writeEndElement(); // txm:application
491 986 mdecorde
                }
492 986 mdecorde
        }
493 986 mdecorde
494 986 mdecorde
        /**
495 986 mdecorde
         * Write txm taxonomies.
496 986 mdecorde
         */
497 986 mdecorde
        public void writeTXMTaxonomies()
498 986 mdecorde
        {
499 986 mdecorde
                for (String tax : items.keySet()) {
500 986 mdecorde
                        writer.writeStartElement("taxonomy");
501 986 mdecorde
                        writer.writeAttribute(ID, tax);
502 986 mdecorde
503 986 mdecorde
                        writer.writeStartElement("bibl");
504 986 mdecorde
                        writer.writeAttribute(TYPE, "tagset");
505 986 mdecorde
                        writer.writeStartElement("title");
506 986 mdecorde
                        writer.writeCharacters(tax);
507 986 mdecorde
                        writer.writeEndElement(); // title
508 986 mdecorde
509 986 mdecorde
                        for (String type : items.get(tax).keySet()) {
510 986 mdecorde
                                writer.writeEmptyElement("ref");
511 986 mdecorde
                                writer.writeAttribute(TYPE, type);
512 986 mdecorde
                                writer.writeAttribute("target", items.get(tax).get(type));
513 986 mdecorde
                        }
514 986 mdecorde
                        writer.writeEndElement(); // bibl
515 986 mdecorde
                        writer.writeEndElement(); // taxonomy
516 986 mdecorde
                }
517 986 mdecorde
        }
518 986 mdecorde
519 986 mdecorde
        /**
520 986 mdecorde
         * The main method.
521 986 mdecorde
         *
522 986 mdecorde
         * @param args the arguments
523 986 mdecorde
         */
524 986 mdecorde
        public static void main(String[] args) {
525 986 mdecorde
526 986 mdecorde
                String rootDir = "~/xml/rgaqcj/";
527 986 mdecorde
                new File(rootDir+"anainline/").mkdir();
528 986 mdecorde
529 986 mdecorde
                ArrayList<String> milestones = new ArrayList<String>();
530 986 mdecorde
531 986 mdecorde
                String file = "roland-p5.xml";
532 986 mdecorde
                String anafile = "roland-p5.xml";
533 986 mdecorde
534 986 mdecorde
                def correspType = new HashMap<String,String>()
535 986 mdecorde
                // correspType(attribut word wlx, attribut type de la propriété ana du w txm)
536 986 mdecorde
                correspType.put("p2","CATTEX2009");
537 986 mdecorde
538 986 mdecorde
                def correspRef = new HashMap<String,String>()
539 986 mdecorde
                // correspRef (attribut word wlx, attribut ref de la propriété ana du w txm. ref pointe vers l'identifiant du respStmt du TEIheader)
540 986 mdecorde
                correspRef.put("p2","ctx1");
541 986 mdecorde
542 986 mdecorde
                //il faut lister les id de tous les respStmt
543 986 mdecorde
                def respId = ["ctx1"];//,"TT1", "TnT1"];
544 986 mdecorde
545 986 mdecorde
                //fait la correspondance entre le respId et le rapport d'execution de l'outil
546 986 mdecorde
                def applications = new HashMap<String,HashMap<String,String>>();
547 986 mdecorde
                applications.put("ctx1",new ArrayList<String>());
548 986 mdecorde
                applications.get("ctx1").add("Oxygen");//app ident
549 986 mdecorde
                applications.get("ctx1").add("9.3");//app version
550 986 mdecorde
                applications.get("ctx1").add(null);//app report file path
551 986 mdecorde
552 986 mdecorde
                //fait la correspondance entre le respId et les attributs type de la propriété ana du w txm
553 986 mdecorde
                //pour construire les ref vers les taxonomies
554 986 mdecorde
                def taxonomiesUtilisees = new HashMap<String,String[]>();
555 986 mdecorde
                taxonomiesUtilisees.put("ctx1",["CATTEX2009"]);//,"lemma","lasla","grace"]);
556 986 mdecorde
557 986 mdecorde
                //associe un id d'item avec sa description et son URI
558 986 mdecorde
                def itemsURI = new HashMap<String,HashMap<String,String>>();
559 986 mdecorde
                itemsURI.put("CATTEX2009",new HashMap<String,String>());
560 986 mdecorde
                itemsURI.get("CATTEX2009").put("tagset","http://bfm.ens-lsh.fr/IMG/xml/cattex2009.xml");
561 986 mdecorde
                itemsURI.get("CATTEX2009").put("website","http://bfm.ens-lsh.fr/article.php3?id_article=176");
562 986 mdecorde
563 986 mdecorde
                //informations de respStmt
564 986 mdecorde
                //resps (respId <voir ci-dessus>, [description, person, date])
565 986 mdecorde
                def resps = new HashMap<String,String[]>();
566 986 mdecorde
                resps.put("ctx1", ["initial tagging","alavrentiev","2010-03-02","Tue Mar  2 21:02:55 Paris, Madrid 2010"])
567 986 mdecorde
568 986 mdecorde
                //lance le traitement
569 986 mdecorde
                def builder = new Xml2Ana(new File(rootDir+"/src/",file));
570 986 mdecorde
                builder.setCorrespondances(correspRef, correspType);
571 986 mdecorde
                builder.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI)
572 986 mdecorde
                //dossier de sortie + nom fichier sortie
573 986 mdecorde
                builder.process(anafile);
574 986 mdecorde
575 986 mdecorde
                return
576 986 mdecorde
        }
577 986 mdecorde
578 986 mdecorde
}