Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / StaxIdentityParser.groovy @ 479

History | View | Annotate | Download (10.2 kB)

1
package org.txm.importer
2

    
3
import groovy.transform.CompileStatic
4
import java.io.File;
5
import javax.xml.stream.XMLStreamConstants;
6
import javax.xml.stream.XMLOutputFactory;
7
import javax.xml.stream.XMLStreamWriter;
8
import java.io.File;
9
import java.net.URL;
10
import javax.xml.stream.*;
11
import org.txm.importer.filters.*;
12
import org.txm.importer.graal.PersonalNamespaceContext;
13

    
14
@CompileStatic
15
class StaxIdentityParser {
16
        /** The input */
17
        protected URL inputurl;
18
        protected InputStream inputData;
19
        protected XMLInputFactory factory;
20
        protected XMLStreamReader parser;
21
        
22
        /** The output. */
23
        protected XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
24
        protected BufferedOutputStream output;
25
        protected XMLStreamWriter writer;
26
        
27
        public static String TXMNS = "http://textometrie.org/1.0";
28
        public static String TXM = "txm"
29
        public static String TEINS = "http://www.tei-c.org/ns/1.0";
30
        public static String TEI = "tei"
31
        protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
32
        
33
        //protected StringBuilder currentXPath = new StringBuilder("/")
34
        protected String localname;
35
        int processingXInclude = 0;
36
        
37
        public StaxIdentityParser(File infile) {
38
                this(infile.toURI().toURL());
39
        }
40
        
41
        public StaxIdentityParser(URL inputurl) {
42
                this.inputurl = inputurl;
43
                this.inputData = inputurl.openStream();
44
                this.factory = XMLInputFactory.newInstance();
45
                this.parser = factory.createXMLStreamReader(inputData);
46
        }
47
        
48
        protected void before() {
49
                
50
        }
51
        
52
        protected void after() {
53
                factory = null;
54
                if (parser !=null) parser.close();
55
                writer.flush();
56
                if (writer != null) writer.close();
57
                if (inputData != null) inputData.close();
58
                writer = null;
59
                parser = null;
60
        }
61
        
62
        protected void closeForError() {
63
                parser.close();
64
        }
65
        
66
        /**
67
         * Creates the output.
68
         *
69
         * @param outfile the outfile
70
         * @return true, if successful
71
         */
72
        private boolean createOutput(File f) {
73
                try {
74
                        if (writer != null) // process from a file
75
                                writer.close();
76
                        if (output != null) // process from a file
77
                                output.close();
78

    
79
                        output = new BufferedOutputStream(new FileOutputStream(f), 16*1024)
80
                        
81
                        writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file
82
                        writer.setNamespaceContext(Nscontext);
83
                        return true;
84
                } catch (Exception e) {
85
                        System.out.println("Error: create output of "+f+": "+e);
86
                        return false;
87
                }
88
        }
89
        
90
        public boolean process(File outfile)
91
        {
92
                if (!createOutput(outfile))
93
                        return false;
94
                        
95
                writer.writeStartDocument("UTF-8", "1.0");
96
                writer.writeCharacters("\n");
97
                boolean ret = process(writer);
98
                if (writer != null) { // process from a file
99
                        writer.close();
100
                }
101
                if (output != null) { // process from a file
102
                //        output.flush()
103
                        output.close();
104
                }
105
                
106
                if (parser != null) {
107
                        try {parser.close()} catch(Exception e){println "parser exep: "+e; return false;}
108
                }
109
                return ret;
110
        }
111
        
112
        public final static String SLASH = "/"
113
        public boolean process(XMLStreamWriter awriter)
114
        {
115
                this.writer = awriter;
116
                //if (processingXInclude == 0) {
117
                        before(); // if you need to do something before reading the xml
118
                //}
119
                try {
120
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
121
                                switch (event) {
122
                                        case XMLStreamConstants.NAMESPACE:
123
                                                processNamespace();
124
                                                break;
125
                                        case XMLStreamConstants.START_ELEMENT:
126
                                                localname = parser.getLocalName();
127
                                                //currentXPath.append(SLASH)
128
                                                processStartElement();
129
                                                break;                
130
                                        case XMLStreamConstants.CHARACTERS:
131
                                                processCharacters();
132
                                                break;
133
                                        case XMLStreamConstants.END_ELEMENT:
134
                                                localname = parser.getLocalName();
135
                                                processEndElement();
136
                                                //currentXPath.substring(0, currentXPath.length() - localname.length() -1)
137
                                                break;
138
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
139
                                                processProcessingInstruction();
140
                                                break;
141
                                        case XMLStreamConstants.DTD:
142
                                                processDTD();
143
                                                break;
144
                                        case XMLStreamConstants.CDATA:
145
                                                processCDATA();
146
                                                break;
147
                                        case XMLStreamConstants.COMMENT:
148
                                                processComment();
149
                                                break;
150
                                        case XMLStreamConstants.END_DOCUMENT:
151
                                                processEndDocument();
152
                                                break;
153
                                        case XMLStreamConstants.ENTITY_REFERENCE:
154
                                                processEntityReference();
155
                                                break;
156
                                }
157
                        }
158
                } catch(Exception e) {
159
                        println("Unexpected error while parsing file "+inputurl+" : "+e);
160
                        println("Location line: "+parser.getLocation().getLineNumber()+" character: "+parser.getLocation().getColumnNumber());
161
                        org.txm.utils.logger.Log.printStackTrace(e);
162
                        //e.printStackTrace();
163
                        if (writer != null) writer.close();
164
                        if (output != null) output.close();
165
                        parser.close();
166
                        return false;
167
                }
168
                //if (processingXInclude == 0) {
169
                after(); // if you need to do something before closing the parser();
170
                //}
171
                return true;
172
        }
173
        
174
        /**
175
         * The start element has already been written
176
         * @param tagname
177
         */
178
        public void goToEnd(String tagname) {
179
                //println "start gotoend $tagname"
180
                def elements = 1;
181
                try {
182
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
183
                                //println "event "+event
184
                                switch (event) {
185
                                        case XMLStreamConstants.NAMESPACE:
186
                                                processNamespace();
187
                                                break;
188
                                        case XMLStreamConstants.START_ELEMENT:
189
                                                elements++;
190
                                                localname = parser.getLocalName();
191
                                                //currentXPath.append(SLASH)
192
                                                _processStartElement();
193
                                                break;
194
                                        case XMLStreamConstants.CHARACTERS:
195
                                                processCharacters();
196
                                                break;
197
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
198
                                                processProcessingInstruction();
199
                                                break;
200
                                        case XMLStreamConstants.DTD:
201
                                                processDTD();
202
                                                break;
203
                                        case XMLStreamConstants.CDATA:
204
                                                processCDATA();
205
                                                break;
206
                                        case XMLStreamConstants.COMMENT:
207
                                                processComment();
208
                                                break;
209
                                        case XMLStreamConstants.END_ELEMENT:
210
                                                elements--;
211
                                                localname = parser.getLocalName();
212
                                                //currentXPath.substring(0, currentXPath.length() - localname.length() -1)
213
                                                writer.writeEndElement();
214
                                                if (elements == 0 && localname == tagname)
215
                                                        return;
216
                                                break;
217
                                        case XMLStreamConstants.END_DOCUMENT:
218
                                                processEndDocument();
219
                                                break;
220
                                        case XMLStreamConstants.ENTITY_REFERENCE:
221
                                                processEntityReference();
222
                                                break;
223
                                }
224
                        }
225
                } catch(Exception e) {
226
                        println("Error while parsing file "+inputurl);
227
                        println("Location "+parser.getLocation());
228
                        org.txm.utils.logger.Log.printStackTrace(e);
229
                        output.close();
230
                        parser.close();
231
                        return;
232
                }
233
        }
234
        
235
        public def getLocation() {
236
                if (parser != null)
237
                        return "Line: "+parser.getLocation().getLineNumber()+" Col: "+parser.getLocation().        getColumnNumber()
238
                return null;
239
        }
240
        
241
        protected void processNamespace() {
242
                writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
243
        }
244
        
245
        public static final String INCLUDE = "include"
246
        public static final String XI = "xi"
247
        
248
        protected void processStartElement()
249
        {
250
                String prefix = parser.getPrefix();
251
//                if (INCLUDE == localname && XI == prefix) {
252
//                        processXInclude();
253
//                } else {
254
                        if (prefix != null && prefix.length() > 0)
255
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
256
                        else
257
                                writer.writeStartElement(localname);
258
                        
259
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
260
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
261
                        }
262
                        
263
                        writeAttributes();
264
//                }
265
        }
266
        
267
        private void _processStartElement()
268
        {
269
                String prefix = parser.getPrefix();
270
//                if (INCLUDE == localname && XI == prefix) {
271
//                        processXInclude();
272
//                } else {
273
                        if (prefix != null && prefix.length() > 0)
274
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
275
                        else
276
                                writer.writeStartElement(localname);
277
                        
278
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
279
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
280
                        }
281
                        
282
                        writeAttributes();
283
//                }
284
        }
285
        
286
        protected void writeAttributes() {
287
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
288
                        writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
289
                }
290
        }
291
        
292
        protected void writeAttribute(String prefix, String name, String value) {
293
                if (prefix != null && prefix.length() > 0)
294
                        writer.writeAttribute(prefix+":"+name, value);
295
                else
296
                        writer.writeAttribute(name, value);
297
        }
298
        
299
        protected void processCharacters()
300
        {
301
                writer.writeCharacters(parser.getText());
302
        }
303
        
304
        protected void processProcessingInstruction()
305
        {
306
                writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
307
        }
308
        
309
        protected void processDTD()
310
        {
311
                writer.writeDTD(parser.getText());
312
        }
313
        
314
        protected void processCDATA()
315
        {
316
                writer.writeCData(parser.getText())        
317
        }
318
        
319
        protected void processComment()
320
        {
321
                writer.writeComment(parser.getText());
322
        }
323
        
324
        protected void processEndElement()
325
        {
326
//                if (localname == INCLUDE && parser.getPrefix() == XI) {
327
//                        // nothing !!
328
//                } else {
329
                        writer.writeEndElement();
330
//                }
331
        }
332
        
333
        protected void processEndDocument() {
334
                writer.writeEndDocument();
335
        }
336
        
337
        protected void processEntityReference() {
338
                writer.writeEntityRef(parser.getLocalName());
339
        }
340
        /**
341
         * Process the XInclude elements
342
         */
343
        protected void processXInclude() {
344
                String url = parser.getAttributeValue(null, "href"); // relative only
345
                File driver = new File(inputurl.getFile());
346
                File ref = new File(driver.getParent(), url);
347
                if (ref.exists()) {
348
                        URL includeurl = ref.toURI().toURL();
349
                        // save variables before killing them
350
                        println "process xi include: "+ref
351
                        def parserSave = this.parser // back up current parser
352
                        def xiIncludeInputData = includeurl.openStream();
353
                        def xiIncludeParser = factory.createXMLStreamReader(xiIncludeInputData);
354
                        parser = xiIncludeParser;
355
                        processingXInclude++ // to avoid recalling before & after methods
356
                        this.process(writer);
357
                        processingXInclude-- // end of XInclude processing
358
                        this.parser = parserSave // restore parser
359
                } else {
360
                        println "Warning referenced file: $ref does not exists"
361
                }
362
        }
363
        
364
        public static void main(String[] args)
365
        {
366
                File input = new File("/home/mdecorde/xml/xiinclude/master.xml")
367
                File output = new File("/home/mdecorde/xml/xiinclude/merged.xml")
368
                if (!(input.exists() && input.canRead())) {
369
                        println "cannot found $input";
370
                        return;
371
                }
372
                def builder = new StaxIdentityParser(input.toURI().toURL());
373
                if (builder.process(output)) {
374
                        println "success ? "+ValidateXml.test(output);
375
                } else {
376
                        println "failure !"
377
                }
378
        }
379
}