Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / StaxIdentityParser.groovy @ 724

History | View | Annotate | Download (10.2 kB)

1
package org.txm.importer
2

    
3
import groovy.transform.CompileStatic
4
import java.io.File;
5
import javax.xml.stream.XMLStreamConstants;
6
import javax.xml.stream.XMLOutputFactory;
7
import javax.xml.stream.XMLStreamWriter;
8
import java.io.File;
9
import java.net.URL;
10
import javax.xml.stream.*;
11
import org.txm.importer.filters.*;
12
import org.txm.importer.graal.PersonalNamespaceContext;
13

    
14
class StaxIdentityParser {
15
        /** The input */
16
        protected URL inputurl;
17
        protected InputStream inputData;
18
        protected XMLInputFactory factory;
19
        protected XMLStreamReader parser;
20
        
21
        /** The output. */
22
        protected XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
23
        protected BufferedOutputStream output;
24
        protected XMLStreamWriter writer;
25
        
26
        public static String TXMNS = "http://textometrie.org/1.0";
27
        public static String TXM = "txm"
28
        public static String TEINS = "http://www.tei-c.org/ns/1.0";
29
        public static String TEI = "tei"
30
        protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
31
        
32
        //protected StringBuilder currentXPath = new StringBuilder("/")
33
        protected String localname;
34
        int processingXInclude = 0;
35
        
36
        public StaxIdentityParser(File infile) {
37
                this(infile.toURI().toURL());
38
        }
39
        
40
        public StaxIdentityParser(URL inputurl) {
41
                this.inputurl = inputurl;
42
                this.inputData = inputurl.openStream();
43
                this.factory = XMLInputFactory.newInstance();
44
                this.parser = factory.createXMLStreamReader(inputData);
45
        }
46
        
47
        protected void before() {
48
                
49
        }
50
        
51
        protected void after() {
52
                factory = null;
53
                if (parser !=null) parser.close();
54
                writer.flush();
55
                if (writer != null) writer.close();
56
                if (inputData != null) inputData.close();
57
                writer = null;
58
                parser = null;
59
        }
60
        
61
        protected void closeForError() {
62
                parser.close();
63
        }
64
        
65
        /**
66
         * Creates the output.
67
         *
68
         * @param outfile the outfile
69
         * @return true, if successful
70
         */
71
        private boolean createOutput(File f) {
72
                try {
73
                        if (writer != null) // process from a file
74
                                writer.close();
75
                        if (output != null) // process from a file
76
                                output.close();
77

    
78
                        output = new BufferedOutputStream(new FileOutputStream(f), 16*1024)
79
                        
80
                        writer = outfactory.createXMLStreamWriter(output, "UTF-8");//create a new file
81
                        writer.setNamespaceContext(Nscontext);
82
                        return true;
83
                } catch (Exception e) {
84
                        System.out.println("Error: create output of "+f+": "+e);
85
                        return false;
86
                }
87
        }
88
        
89
        public boolean process(File outfile)
90
        {
91
                if (!createOutput(outfile))
92
                        return false;
93
                        
94
                writer.writeStartDocument("UTF-8", "1.0");
95
                writer.writeCharacters("\n");
96
                boolean ret = process(writer);
97
                if (writer != null) { // process from a file
98
                        writer.close();
99
                }
100
                if (output != null) { // process from a file
101
                //        output.flush()
102
                        output.close();
103
                }
104
                
105
                if (parser != null) {
106
                        try {parser.close()} catch(Exception e){println "parser exep: "+e; return false;}
107
                }
108
                return ret;
109
        }
110
        
111
        public final static String SLASH = "/"
112
        public boolean process(XMLStreamWriter awriter)
113
        {
114
                this.writer = awriter;
115
                //if (processingXInclude == 0) {
116
                        before(); // if you need to do something before reading the xml
117
                //}
118
                try {
119
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
120
                                switch (event) {
121
                                        case XMLStreamConstants.NAMESPACE:
122
                                                processNamespace();
123
                                                break;
124
                                        case XMLStreamConstants.START_ELEMENT:
125
                                                localname = parser.getLocalName();
126
                                                //currentXPath.append(SLASH)
127
                                                processStartElement();
128
                                                break;                
129
                                        case XMLStreamConstants.CHARACTERS:
130
                                                processCharacters();
131
                                                break;
132
                                        case XMLStreamConstants.END_ELEMENT:
133
                                                localname = parser.getLocalName();
134
                                                processEndElement();
135
                                                //currentXPath.substring(0, currentXPath.length() - localname.length() -1)
136
                                                break;
137
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
138
                                                processProcessingInstruction();
139
                                                break;
140
                                        case XMLStreamConstants.DTD:
141
                                                processDTD();
142
                                                break;
143
                                        case XMLStreamConstants.CDATA:
144
                                                processCDATA();
145
                                                break;
146
                                        case XMLStreamConstants.COMMENT:
147
                                                processComment();
148
                                                break;
149
                                        case XMLStreamConstants.END_DOCUMENT:
150
                                                processEndDocument();
151
                                                break;
152
                                        case XMLStreamConstants.ENTITY_REFERENCE:
153
                                                processEntityReference();
154
                                                break;
155
                                }
156
                        }
157
                } catch(Exception e) {
158
                        println("Unexpected error while parsing file "+inputurl+" : "+e);
159
                        println("Location line: "+parser.getLocation().getLineNumber()+" character: "+parser.getLocation().getColumnNumber());
160
                        org.txm.utils.logger.Log.printStackTrace(e);
161
                        //e.printStackTrace();
162
                        if (writer != null) writer.close();
163
                        if (output != null) output.close();
164
                        parser.close();
165
                        return false;
166
                }
167
                //if (processingXInclude == 0) {
168
                after(); // if you need to do something before closing the parser();
169
                //}
170
                return true;
171
        }
172
        
173
        /**
174
         * The start element has already been written
175
         * @param tagname
176
         */
177
        public void goToEnd(String tagname) {
178
                //println "start gotoend $tagname"
179
                def elements = 1;
180
                try {
181
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
182
                                //println "event "+event
183
                                switch (event) {
184
                                        case XMLStreamConstants.NAMESPACE:
185
                                                processNamespace();
186
                                                break;
187
                                        case XMLStreamConstants.START_ELEMENT:
188
                                                elements++;
189
                                                localname = parser.getLocalName();
190
                                                //currentXPath.append(SLASH)
191
                                                _processStartElement();
192
                                                break;
193
                                        case XMLStreamConstants.CHARACTERS:
194
                                                processCharacters();
195
                                                break;
196
                                        case XMLStreamConstants.PROCESSING_INSTRUCTION:
197
                                                processProcessingInstruction();
198
                                                break;
199
                                        case XMLStreamConstants.DTD:
200
                                                processDTD();
201
                                                break;
202
                                        case XMLStreamConstants.CDATA:
203
                                                processCDATA();
204
                                                break;
205
                                        case XMLStreamConstants.COMMENT:
206
                                                processComment();
207
                                                break;
208
                                        case XMLStreamConstants.END_ELEMENT:
209
                                                elements--;
210
                                                localname = parser.getLocalName();
211
                                                //currentXPath.substring(0, currentXPath.length() - localname.length() -1)
212
                                                writer.writeEndElement();
213
                                                if (elements == 0 && localname == tagname)
214
                                                        return;
215
                                                break;
216
                                        case XMLStreamConstants.END_DOCUMENT:
217
                                                processEndDocument();
218
                                                break;
219
                                        case XMLStreamConstants.ENTITY_REFERENCE:
220
                                                processEntityReference();
221
                                                break;
222
                                }
223
                        }
224
                } catch(Exception e) {
225
                        println("Error while parsing file "+inputurl);
226
                        println("Location "+parser.getLocation());
227
                        org.txm.utils.logger.Log.printStackTrace(e);
228
                        output.close();
229
                        parser.close();
230
                        return;
231
                }
232
        }
233
        
234
        public def getLocation() {
235
                if (parser != null)
236
                        return "Line: "+parser.getLocation().getLineNumber()+" Col: "+parser.getLocation().        getColumnNumber()
237
                return null;
238
        }
239
        
240
        protected void processNamespace() {
241
                writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
242
        }
243
        
244
        public static final String INCLUDE = "include"
245
        public static final String XI = "xi"
246
        
247
        protected void processStartElement()
248
        {
249
                String prefix = parser.getPrefix();
250
//                if (INCLUDE == localname && XI == prefix) {
251
//                        processXInclude();
252
//                } else {
253
                        if (prefix != null && prefix.length() > 0)
254
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
255
                        else
256
                                writer.writeStartElement(localname);
257
                        
258
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
259
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
260
                        }
261
                        
262
                        writeAttributes();
263
//                }
264
        }
265
        
266
        private void _processStartElement()
267
        {
268
                String prefix = parser.getPrefix();
269
//                if (INCLUDE == localname && XI == prefix) {
270
//                        processXInclude();
271
//                } else {
272
                        if (prefix != null && prefix.length() > 0)
273
                                writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
274
                        else
275
                                writer.writeStartElement(localname);
276
                        
277
                        for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
278
                                writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
279
                        }
280
                        
281
                        writeAttributes();
282
//                }
283
        }
284
        
285
        protected void writeAttributes() {
286
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
287
                        writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
288
                }
289
        }
290
        
291
        protected void writeAttribute(String prefix, String name, String value) {
292
                if (prefix != null && prefix.length() > 0)
293
                        writer.writeAttribute(prefix+":"+name, value);
294
                else
295
                        writer.writeAttribute(name, value);
296
        }
297
        
298
        protected void processCharacters()
299
        {
300
                writer.writeCharacters(parser.getText());
301
        }
302
        
303
        protected void processProcessingInstruction()
304
        {
305
                writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
306
        }
307
        
308
        protected void processDTD()
309
        {
310
                writer.writeDTD(parser.getText());
311
        }
312
        
313
        protected void processCDATA()
314
        {
315
                writer.writeCData(parser.getText())        
316
        }
317
        
318
        protected void processComment()
319
        {
320
                writer.writeComment(parser.getText());
321
        }
322
        
323
        protected void processEndElement()
324
        {
325
//                if (localname == INCLUDE && parser.getPrefix() == XI) {
326
//                        // nothing !!
327
//                } else {
328
                        writer.writeEndElement();
329
//                }
330
        }
331
        
332
        protected void processEndDocument() {
333
                writer.writeEndDocument();
334
        }
335
        
336
        protected void processEntityReference() {
337
                writer.writeEntityRef(parser.getLocalName());
338
        }
339
        /**
340
         * Process the XInclude elements
341
         */
342
        protected void processXInclude() {
343
                String url = parser.getAttributeValue(null, "href"); // relative only
344
                File driver = new File(inputurl.getFile());
345
                File ref = new File(driver.getParent(), url);
346
                if (ref.exists()) {
347
                        URL includeurl = ref.toURI().toURL();
348
                        // save variables before killing them
349
                        println "process xi include: "+ref
350
                        def parserSave = this.parser // back up current parser
351
                        def xiIncludeInputData = includeurl.openStream();
352
                        def xiIncludeParser = factory.createXMLStreamReader(xiIncludeInputData);
353
                        parser = xiIncludeParser;
354
                        processingXInclude++ // to avoid recalling before & after methods
355
                        this.process(writer);
356
                        processingXInclude-- // end of XInclude processing
357
                        this.parser = parserSave // restore parser
358
                } else {
359
                        println "Warning referenced file: $ref does not exists"
360
                }
361
        }
362
        
363
        public static void main(String[] args)
364
        {
365
                File input = new File("/home/mdecorde/xml/xiinclude/master.xml")
366
                File output = new File("/home/mdecorde/xml/xiinclude/merged.xml")
367
                if (!(input.exists() && input.canRead())) {
368
                        println "cannot found $input";
369
                        return;
370
                }
371
                def builder = new StaxIdentityParser(input.toURI().toURL());
372
                if (builder.process(output)) {
373
                        println "success ? "+ValidateXml.test(output);
374
                } else {
375
                        println "failure !"
376
                }
377
        }
378
}