Statistics
| Revision:

ccc / projets / CMC2ELAN / src / StaxIdentityParser.groovy @ 2

History | View | Annotate | Download (10.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $
27
//
28

    
29
import java.io.File;
30
import javax.xml.stream.XMLStreamConstants;
31
import javax.xml.stream.XMLOutputFactory;
32
import javax.xml.stream.XMLStreamWriter;
33
import java.io.File;
34
import java.net.URL;
35
import javax.xml.stream.*;
36
import org.txm.importer.filters.*;
37

    
38
class StaxIdentityParser {
39
        /** The input */
40
        protected URL inputurl;
41
        protected def inputData;
42
        protected XMLInputFactory factory;
43
        protected XMLStreamReader parser;
44
        
45
        /** The output. */
46
        protected XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
47
        protected Writer output;
48
        protected XMLStreamWriter writer;
49
        
50
        public static String TXMNS = "http://textometrie.org/1.0";
51
        public static String TEINS = "http://www.tei-c.org/ns/1.0";
52
        protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
53
        
54
        protected String currentXPath = "/"
55
        protected String localname;
56
        int processingXInclude = 0;
57
        
58
        public StaxIdentityParser(File infile)
59
        {
60
                this(infile.toURI().toURL());
61
        }
62
        
63
        public StaxIdentityParser(URL inputurl)
64
        {
65
                this.inputurl = inputurl;
66
                inputData = inputurl.openStream();
67
                factory = XMLInputFactory.newInstance();
68
                parser = factory.createXMLStreamReader(inputData);
69
        }
70
        
71
        protected void before()
72
        {
73
                
74
        }
75
        
76
        protected void after()
77
        {
78
                factory = null;
79
                if (parser !=null) parser.close();
80
                if (writer != null) writer.close();
81
                parser = null;
82
        }
83
        
84
        protected void closeForError() {
85
                parser.close();
86
        }
87
        
88
        /**
89
         * Creates the output.
90
         *
91
         * @param outfile the outfile
92
         * @return true, if successful
93
         */
94
        private boolean createOutput(File f) {
95
                try {
96
                        if (writer != null) // process from a file
97
                                writer.close();
98
                        if (output != null) // process from a file
99
                                output.close();
100

    
101
                        output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
102
                        writer = outfactory.createXMLStreamWriter(output);//create a new file
103
                        writer.setNamespaceContext(Nscontext);
104
                        return true;
105
                } catch (Exception e) {
106
                        System.out.println("Error: create output of "+f+": "+e);
107
                        return false;
108
                }
109
        }
110
        
111
        public boolean process(File outfile)
112
        {
113
                if (!createOutput(outfile))
114
                        return false;
115
                writer.writeStartDocument("UTF-8", "1.0");
116
                writer.writeCharacters("\n");
117
                boolean ret = process(writer);
118
                if (writer != null) { // process from a file
119
                        writer.close();
120
                }
121
                if (output != null) { // process from a file
122
                //        output.flush()
123
                        output.close();
124
                }
125
                
126
                if (parser != null)
127
                        try {parser.close()} catch(Exception e){println "parser exep: "+e}
128
                        
129
                return ret;
130
        }
131
        
132
        public boolean process(XMLStreamWriter awriter)
133
        {
134
                this.writer = awriter;
135
                if (processingXInclude == 0) {
136
                        before(); // if you need to do something before reading the xml
137
                }
138
                try {
139
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
140
                                switch (event) {
141
                                        case XMLStreamConstants.NAMESPACE:
142
                                                processNamespace();
143
                                                break;
144
                                        case XMLStreamConstants.START_ELEMENT:
145
                                                localname = parser.getLocalName();
146
                                                currentXPath += localname+"/"
147
                                                processStartElement();
148
                                                break;                
149
                                        case XMLStreamConstants.CHARACTERS:
150
                                                processCharacters();
151
                                                break;
152
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
153
                                                processProcessingInstruction();
154
                                                break;
155
                                        case XMLStreamConstants.DTD:
156
                                                processDTD();
157
                                                break;
158
                                        case XMLStreamConstants.CDATA:
159
                                                processCDATA();
160
                                                break;
161
                                        case XMLStreamConstants.COMMENT:
162
                                                processComment();
163
                                                break;
164
                                        case XMLStreamConstants.END_ELEMENT:
165
                                                localname = parser.getLocalName();
166
                                                processEndElement();        
167
                                                currentXPath = currentXPath.substring(0, currentXPath.length() - localname.length() -1)
168
                                                break;
169
                                        case XMLStreamConstants.END_DOCUMENT:
170
                                                processEndDocument();
171
                                                break;
172
                                        case XMLStreamConstants.ENTITY_REFERENCE:
173
                                                processEntityReference();
174
                                                break;
175
                                }
176
                        }
177
                } catch(Exception e) {
178
                        println("Unexpected error while parsing file "+inputurl+" : "+e);
179
                        println("Location line: "+parser.getLocation().getLineNumber()+" character: "+parser.getLocation().getColumnNumber());
180
                        e.printStackTrace();
181
                        //e.printStackTrace();
182
                        if (writer != null) writer.close();
183
                        if (output != null) output.close();
184
                        parser.close();
185
                        return false;
186
                }
187
                if (processingXInclude == 0) {
188
                        after(); // if you need to do something before closing the parser();
189
                }
190
                return true;
191
        }
192
        
193
        /**
194
         * The start element has already been written
195
         * @param tagname
196
         */
197
        public void goToEnd(String tagname) {
198
                //println "start gotoend $tagname"
199
                def elements = 1;
200
                try {
201
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
202
                                //println "event "+event
203
                                switch (event) {
204
                                        case XMLStreamConstants.NAMESPACE:
205
                                                processNamespace();
206
                                                break;
207
                                        case XMLStreamConstants.START_ELEMENT:
208
                                                elements++;
209
                                                localname = parser.getLocalName();
210
                                                currentXPath += localname+"/"
211
                                                _processStartElement();
212
                                                break;
213
                                        case XMLStreamConstants.CHARACTERS:
214
                                                processCharacters();
215
                                                break;
216
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
217
                                                processProcessingInstruction();
218
                                                break;
219
                                        case XMLStreamConstants.DTD:
220
                                                processDTD();
221
                                                break;
222
                                        case XMLStreamConstants.CDATA:
223
                                                processCDATA();
224
                                                break;
225
                                        case XMLStreamConstants.COMMENT:
226
                                                processComment();
227
                                                break;
228
                                        case XMLStreamConstants.END_ELEMENT:
229
                                                elements--;
230
                                                localname = parser.getLocalName();
231
                                                currentXPath = currentXPath.substring(0, currentXPath.length() - localname.length() -1)
232
                                                writer.writeEndElement();
233
                                                if (elements == 0 && localname == tagname)
234
                                                        return;
235
                                                break;
236
                                        case XMLStreamConstants.END_DOCUMENT:
237
                                                processEndDocument();
238
                                                break;
239
                                        case XMLStreamConstants.ENTITY_REFERENCE:
240
                                                processEntityReference();
241
                                                break;
242
                                }
243
                        }
244
                } catch(Exception e) {
245
                        println("Error while parsing file "+inputurl);
246
                        println("Location "+parser.getLocation());
247
                        org.txm.utils.logger.Log.printStackTrace(e);
248
                        output.close();
249
                        parser.close();
250
                        return;
251
                }
252
        }
253
        
254
        public def getLocation() {
255
                if (parser != null)
256
                        return "Line: "+parser.getLocation().getLineNumber()+" Col: "+parser.getLocation().        getColumnNumber()
257
                return null;
258
        }
259
        
260
        protected void processNamespace() {
261
                writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
262
        }
263
        
264
        public static final String INCLUDE = "include"
265
        public static final String XI = "xi"
266
        
267
        protected void processStartElement()
268
        {
269
                String prefix = parser.getPrefix();
270
                if (INCLUDE == localname && XI == prefix) {
271
                        processXInclude();
272
                } else {
273
                        if (prefix != null && prefix.length() > 0)
274
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
275
                        else
276
                                writer.writeStartElement(localname);
277
                        
278
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
279
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
280
                        }
281
                        
282
                        writeAttributes();
283
                }
284
        }
285
        
286
        private void _processStartElement()
287
        {
288
                String prefix = parser.getPrefix();
289
                if (INCLUDE == localname && XI == prefix) {
290
                        processXInclude();
291
                } else {
292
                        if (prefix != null && prefix.length() > 0)
293
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
294
                        else
295
                                writer.writeStartElement(localname);
296
                        
297
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
298
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
299
                        }
300
                        
301
                        writeAttributes();
302
                }
303
        }
304
        
305
        protected void writeAttributes() {
306
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
307
                        String attrPrefix = parser.getAttributePrefix(i);
308
                        if (attrPrefix != null && attrPrefix.length() > 0)
309
                                writer.writeAttribute(attrPrefix+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i));
310
                        else
311
                                writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
312
                }
313
        }
314
        
315
        protected void processCharacters()
316
        {
317
                writer.writeCharacters(parser.getText());
318
        }
319
        
320
        protected void processProcessingInstruction()
321
        {
322
                writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
323
        }
324
        
325
        protected void processDTD()
326
        {
327
                writer.writeDTD(parser.getText());
328
        }
329
        
330
        protected void processCDATA()
331
        {
332
                writer.writeCData(parser.getText())        
333
        }
334
        
335
        protected void processComment()
336
        {
337
                writer.writeComment(parser.getText());
338
        }
339
        
340
        protected void processEndElement()
341
        {
342
                if (localname == "include" && parser.getPrefix() == "xi") {
343
                        // nothing !!
344
                } else {
345
                        writer.writeEndElement();
346
                }
347
        }
348
        
349
        protected void processEndDocument() {
350
                writer.writeEndDocument();
351
        }
352
        
353
        protected void processEntityReference() {
354
                writer.writeEntityRef(parser.getLocalName());
355
        }
356
        /**
357
         * Process the XInclude elements
358
         */
359
        protected void processXInclude() {
360
                String url = parser.getAttributeValue(null, "href"); // relative only
361
                File driver = new File(inputurl.getFile());
362
                File ref = new File(driver.getParent(), url);
363
                if (ref.exists()) {
364
                        URL includeurl = ref.toURI().toURL();
365
                        // save variables before killing them
366
                        println "process xi include: "+ref
367
                        def parserSave = this.parser // back up current parser
368
                        def xiIncludeInputData = includeurl.openStream();
369
                        def xiIncludeParser = factory.createXMLStreamReader(xiIncludeInputData);
370
                        parser = xiIncludeParser;
371
                        processingXInclude++ // to avoid recalling before & after methods
372
                        this.process(writer);
373
                        processingXInclude-- // end of XInclude processing
374
                        this.parser = parserSave // restore parser
375
                } else {
376
                        println "Warning referenced file: $ref does not exists"
377
                }
378
        }
379
        
380
        public static void main(String[] args)
381
        {
382
                File input = new File("/home/mdecorde/xml/xiinclude/master.xml")
383
                File output = new File("/home/mdecorde/xml/xiinclude/merged.xml")
384
                if (!(input.exists() && input.canRead())) {
385
                        println "cannot found $input";
386
                        return;
387
                }
388
                
389
        }
390
}