Révision 2939
tmp/org.txm.core/src/java/org/txm/xml/XMLParser.java (revision 2939) | ||
---|---|---|
45 | 45 |
|
46 | 46 |
public static String TEI = "tei"; |
47 | 47 |
|
48 |
protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
|
|
48 |
protected PersonalNamespaceContext Nscontext = new PersonalNamespaceContext(); |
|
49 | 49 |
|
50 | 50 |
protected StringBuilder currentXPath = new StringBuilder(""); |
51 | 51 |
|
... | ... | |
91 | 91 |
|
92 | 92 |
} |
93 | 93 |
|
94 |
protected void after() throws XMLStreamException, IOException { |
|
94 |
/** |
|
95 |
* |
|
96 |
* @param allWentWell true if the process ends well |
|
97 |
* |
|
98 |
* @throws XMLStreamException |
|
99 |
* @throws IOException |
|
100 |
*/ |
|
101 |
protected void after(boolean allWentWell) throws XMLStreamException, IOException { |
|
95 | 102 |
if (factory != null) { |
96 | 103 |
factory = null; |
97 | 104 |
|
... | ... | |
120 | 127 |
System.out.println("inputData excep: " + e); |
121 | 128 |
} |
122 | 129 |
} |
123 |
|
|
124 |
|
|
125 | 130 |
} |
126 | 131 |
} |
127 | 132 |
|
128 | 133 |
public final static String SLASH = "/"; |
129 | 134 |
|
135 |
/** |
|
136 |
* true if the first start element was parsed |
|
137 |
*/ |
|
138 |
protected boolean firstElementParsed = false; |
|
139 |
|
|
130 | 140 |
public final boolean process() throws XMLStreamException, IOException { |
131 | 141 |
|
132 | 142 |
// if (processingXInclude == 0) { |
133 | 143 |
before(); // if you need to do something before reading the xml |
134 | 144 |
// } |
145 |
|
|
146 |
firstElementParsed = false; |
|
147 |
|
|
135 | 148 |
try { |
136 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
149 |
for (int event = parser.getEventType(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
137 | 150 |
|
138 | 151 |
// preparing process |
139 | 152 |
if (event == XMLStreamConstants.START_ELEMENT) { |
153 |
firstElementParsed = true; |
|
140 | 154 |
localname = parser.getLocalName(); |
141 | 155 |
currentXPath.append(SLASH); |
142 | 156 |
currentXPath.append(localname); |
... | ... | |
185 | 199 |
System.out.println("Location line: " + parser.getLocation().getLineNumber() + " character: " + parser.getLocation().getColumnNumber()); |
186 | 200 |
org.txm.utils.logger.Log.printStackTrace(e); |
187 | 201 |
// e.printStackTrace(); |
202 |
after(false); // if you need to do something before closing the parser(); |
|
188 | 203 |
return false; |
189 | 204 |
} |
190 | 205 |
finally { |
191 |
after(); // if you need to do something before closing the parser(); |
|
206 |
after(true); // if you need to do something before closing the parser();
|
|
192 | 207 |
} |
193 | 208 |
|
194 | 209 |
return true; |
... | ... | |
196 | 211 |
|
197 | 212 |
protected final void processParserEvent(int event) throws XMLStreamException, IOException { |
198 | 213 |
switch (event) { |
214 |
case XMLStreamConstants.START_DOCUMENT: |
|
215 |
processStartDocument(); |
|
216 |
break; |
|
217 |
case XMLStreamConstants.SPACE: |
|
218 |
processSpace(); |
|
219 |
break; |
|
220 |
case XMLStreamConstants.NOTATION_DECLARATION: |
|
221 |
processNotationDeclaration(); |
|
222 |
break; |
|
199 | 223 |
case XMLStreamConstants.NAMESPACE: |
200 | 224 |
processNamespace(); |
201 | 225 |
break; |
... | ... | |
229 | 253 |
} |
230 | 254 |
} |
231 | 255 |
|
232 |
/** |
|
233 |
* The start element has already been written |
|
234 |
* |
|
235 |
* @param tagname |
|
236 |
* @throws XMLStreamException |
|
237 |
* @throws IOException |
|
238 |
*/ |
|
239 |
public void goToEnd(String tagname) throws XMLStreamException, IOException { |
|
240 |
int elements = 1; |
|
241 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
|
242 |
// System.out.println("event "+event |
|
243 |
switch (event) { |
|
244 |
case XMLStreamConstants.NAMESPACE: |
|
245 |
processNamespace(); |
|
246 |
break; |
|
247 |
case XMLStreamConstants.START_ELEMENT: |
|
248 |
elements++; |
|
249 |
localname = parser.getLocalName(); |
|
250 |
currentXPath.append(SLASH); |
|
251 |
currentXPath.append(localname); |
|
252 |
buildCurrentAttributes(); |
|
253 |
|
|
254 |
processStartElement(); |
|
255 |
break; |
|
256 |
case XMLStreamConstants.CHARACTERS: |
|
257 |
processCharacters(); |
|
258 |
break; |
|
259 |
case XMLStreamConstants.PROCESSING_INSTRUCTION: |
|
260 |
processProcessingInstruction(); |
|
261 |
break; |
|
262 |
case XMLStreamConstants.DTD: |
|
263 |
processDTD(); |
|
264 |
break; |
|
265 |
case XMLStreamConstants.CDATA: |
|
266 |
processCDATA(); |
|
267 |
break; |
|
268 |
case XMLStreamConstants.COMMENT: |
|
269 |
processComment(); |
|
270 |
break; |
|
271 |
case XMLStreamConstants.END_ELEMENT: |
|
272 |
elements--; |
|
273 |
localname = parser.getLocalName(); |
|
274 |
|
|
275 |
currentXPath.setLength(currentXPath.length() - localname.length() - 1); |
|
276 |
|
|
277 |
if (elements == 0 && localname == tagname) |
|
278 |
return; |
|
279 |
break; |
|
280 |
case XMLStreamConstants.END_DOCUMENT: |
|
281 |
processEndDocument(); |
|
282 |
break; |
|
283 |
case XMLStreamConstants.ENTITY_REFERENCE: |
|
284 |
processEntityReference(); |
|
285 |
break; |
|
286 |
} |
|
287 |
} |
|
288 |
|
|
289 |
} |
|
290 |
|
|
291 | 256 |
public final String getLocation() { |
292 | 257 |
if (parser != null) { |
293 | 258 |
return "Line: " + parser.getLocation().getLineNumber() + " Col: " + parser.getLocation().getColumnNumber(); |
... | ... | |
313 | 278 |
return attributesStack.lastElement(); |
314 | 279 |
} |
315 | 280 |
|
281 |
protected void processStartDocument() throws XMLStreamException {} |
|
282 |
|
|
283 |
protected void processSpace() throws XMLStreamException {} |
|
284 |
|
|
285 |
protected void processNotationDeclaration() throws XMLStreamException {} |
|
286 |
|
|
316 | 287 |
protected void processNamespace() throws XMLStreamException {} |
317 | 288 |
|
318 | 289 |
protected void processStartElement() throws XMLStreamException, IOException {} |
tmp/org.txm.core/src/java/org/txm/xml/XMLProcessor.java (revision 2939) | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.BufferedOutputStream; |
4 | 4 |
import java.io.File; |
5 |
import java.io.FileNotFoundException; |
|
5 | 6 |
import java.io.FileOutputStream; |
6 | 7 |
import java.io.IOException; |
7 | 8 |
import java.net.URL; |
8 |
import java.util.Arrays; |
|
9 | 9 |
import java.util.LinkedHashMap; |
10 | 10 |
|
11 | 11 |
import javax.xml.stream.XMLOutputFactory; |
... | ... | |
14 | 14 |
import javax.xml.stream.XMLStreamWriter; |
15 | 15 |
|
16 | 16 |
/** |
17 |
* Parse and write a XML file. If no hook specified the file is not transformed |
|
17 |
* Parse and write a XML file. If no hook specified the file is not transformed (mimic the implementation of the old StaxIdentityParser)
|
|
18 | 18 |
* |
19 |
* See thee available Hook&Activators classes to do more things |
|
20 |
* |
|
19 | 21 |
* @author mdecorde |
20 | 22 |
* |
21 | 23 |
*/ |
... | ... | |
28 | 30 |
|
29 | 31 |
protected XMLStreamWriter writer; |
30 | 32 |
|
33 |
/** |
|
34 |
* |
|
35 |
* @param infile the file to read |
|
36 |
* @throws IOException |
|
37 |
* @throws XMLStreamException |
|
38 |
*/ |
|
31 | 39 |
public XMLProcessor(File infile) throws IOException, XMLStreamException { |
32 | 40 |
this(infile.toURI().toURL()); |
33 | 41 |
} |
34 | 42 |
|
43 |
/** |
|
44 |
* |
|
45 |
* @param inputurl the input URL to read |
|
46 |
* @throws IOException |
|
47 |
* @throws XMLStreamException |
|
48 |
*/ |
|
35 | 49 |
public XMLProcessor(URL inputurl) throws IOException, XMLStreamException { |
36 | 50 |
super(inputurl); |
37 | 51 |
} |
38 | 52 |
|
39 | 53 |
/** |
40 |
* Helper method to get an attribute value by its name
|
|
54 |
* Helper method to get a Stax element attribute value using its name
|
|
41 | 55 |
* |
42 | 56 |
* @param name the attribute name |
43 | 57 |
* @return the value if any |
... | ... | |
46 | 60 |
public String getParserAttributeValue(String name) { |
47 | 61 |
if (name == null) return null; |
48 | 62 |
|
63 |
// try the no namespace attribute first |
|
64 |
if (parser.getAttributeValue(null, name) != null) { |
|
65 |
return parser.getAttributeValue(null, name); |
|
66 |
} |
|
67 |
|
|
68 |
//no luck try all attributes |
|
49 | 69 |
int c = parser.getAttributeCount(); |
50 | 70 |
for (int i = 0; i < c; i++) { |
51 | 71 |
if (name.equals(parser.getAttributeLocalName(i))) { |
... | ... | |
62 | 82 |
super.before(); |
63 | 83 |
} |
64 | 84 |
|
85 |
/** |
|
86 |
* closing parser and writer |
|
87 |
*/ |
|
65 | 88 |
@Override |
66 |
protected void after() throws XMLStreamException, IOException { |
|
89 |
protected void after(boolean allWentWell) throws XMLStreamException, IOException {
|
|
67 | 90 |
|
68 |
super.after(); |
|
69 |
|
|
70 |
if (factory != null) { |
|
71 |
factory = null; |
|
72 |
if (parser != null) { |
|
73 |
parser.close(); |
|
74 |
} |
|
75 |
writer.flush(); |
|
76 |
if (writer != null) { |
|
77 |
writer.close(); |
|
78 |
} |
|
79 |
if (inputData != null) { |
|
80 |
inputData.close(); |
|
81 |
} |
|
82 |
writer = null; |
|
83 |
parser = null; |
|
84 |
} |
|
85 |
|
|
91 |
super.after(allWentWell); // close parser, inputData and factory |
|
92 |
|
|
86 | 93 |
if (writer != null) writer.close(); |
87 | 94 |
|
88 | 95 |
if (output != null) output.close(); |
... | ... | |
93 | 100 |
* |
94 | 101 |
* @param outfile the outfile |
95 | 102 |
* @return true, if successful |
103 |
* @throws XMLStreamException |
|
104 |
* @throws IOException |
|
96 | 105 |
*/ |
97 |
private boolean createOutput(File f) { |
|
98 |
try { |
|
106 |
private boolean createOutput(File f) throws XMLStreamException, IOException { |
|
99 | 107 |
if (writer != null) { // process from a file |
100 | 108 |
writer.close(); |
101 | 109 |
} |
... | ... | |
108 | 116 |
writer = outfactory.createXMLStreamWriter(output, "UTF-8");// create a new file |
109 | 117 |
writer.setNamespaceContext(Nscontext); |
110 | 118 |
return true; |
111 |
} |
|
112 |
catch (Exception e) { |
|
113 |
System.out.println("Error: create output of " + f + ": " + e); |
|
114 |
return false; |
|
115 |
} |
|
116 | 119 |
} |
117 | 120 |
|
118 | 121 |
public boolean process(File outFile) throws XMLStreamException, IOException { |
... | ... | |
121 | 124 |
return false; |
122 | 125 |
} |
123 | 126 |
|
124 |
writer.writeStartDocument("UTF-8", "1.0"); |
|
125 |
writer.writeCharacters("\n"); |
|
126 | 127 |
|
127 | 128 |
boolean ret = process(writer); |
128 | 129 |
|
... | ... | |
145 | 146 |
/** |
146 | 147 |
* Go to the end of an element : |
147 | 148 |
* - The start element has already been written |
148 |
* - the content is skipped and not written
|
|
149 |
* - the content is skipped and written |
|
149 | 150 |
* |
151 |
* @return true if all went well |
|
150 | 152 |
* @param tagname |
151 | 153 |
* @throws XMLStreamException |
152 | 154 |
* @throws IOException |
153 | 155 |
*/ |
154 |
@Override |
|
155 |
public void goToEnd(String tagname) throws XMLStreamException, IOException { |
|
156 |
public boolean goToEnd(String tagname) throws XMLStreamException, IOException { |
|
156 | 157 |
int elements = 1; |
157 | 158 |
try { |
158 | 159 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
159 | 160 |
// System.out.println("event "+event |
160 | 161 |
switch (event) { |
162 |
case XMLStreamConstants.START_DOCUMENT: |
|
163 |
processStartDocument(); |
|
164 |
break; |
|
165 |
case XMLStreamConstants.SPACE: |
|
166 |
processSpace(); |
|
167 |
break; |
|
168 |
case XMLStreamConstants.NOTATION_DECLARATION: |
|
169 |
processNotationDeclaration(); |
|
170 |
break; |
|
161 | 171 |
case XMLStreamConstants.NAMESPACE: |
162 | 172 |
processNamespace(); |
163 | 173 |
break; |
... | ... | |
193 | 203 |
|
194 | 204 |
currentXPath.setLength(currentXPath.length() - localname.length() - 1); |
195 | 205 |
|
196 |
if (elements == 0 && localname == tagname) |
|
197 |
return; |
|
206 |
if (elements == 0 && localname == tagname) { |
|
207 |
return false; |
|
208 |
} |
|
198 | 209 |
break; |
199 | 210 |
case XMLStreamConstants.END_DOCUMENT: |
200 | 211 |
processEndDocument(); |
... | ... | |
209 | 220 |
System.out.println("Error while parsing file " + inputurl); |
210 | 221 |
System.out.println("Location " + parser.getLocation()); |
211 | 222 |
org.txm.utils.logger.Log.printStackTrace(e); |
212 |
after(); |
|
213 |
return; |
|
223 |
after(false);
|
|
224 |
return false;
|
|
214 | 225 |
} |
226 |
return true; |
|
215 | 227 |
} |
216 | 228 |
|
229 |
protected void processStartDocument() throws XMLStreamException { |
|
230 |
writer.writeStartDocument(parser.getCharacterEncodingScheme(), parser.getVersion()); |
|
231 |
writer.writeCharacters("\n"); |
|
232 |
} |
|
233 |
|
|
217 | 234 |
@Override |
218 | 235 |
protected void processNamespace() throws XMLStreamException { |
236 |
this.Nscontext.addNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
219 | 237 |
writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
220 | 238 |
} |
221 | 239 |
|
222 |
|
|
223 | 240 |
@Override |
224 | 241 |
protected void processStartElement() throws XMLStreamException, IOException { |
242 |
|
|
225 | 243 |
String prefix = parser.getPrefix(); |
226 | 244 |
if (prefix != null && prefix.length() > 0) { |
227 | 245 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname); |
... | ... | |
260 | 278 |
@Override |
261 | 279 |
protected void processProcessingInstruction() throws XMLStreamException { |
262 | 280 |
writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
281 |
if (!firstElementParsed) writer.writeCharacters("\n"); |
|
263 | 282 |
} |
264 | 283 |
|
265 | 284 |
@Override |
... | ... | |
299 | 318 |
|
300 | 319 |
public static void main(String[] args) { |
301 | 320 |
try { |
302 |
File input = new File(System.getProperty("user.home"), "xml/tdm80j/tdm80j.xml");
|
|
303 |
File output = new File(System.getProperty("user.home"), "xml/tdm80j/out.xml");
|
|
321 |
File input = new File(System.getProperty("user.home"), "xml/identity/test.xml");
|
|
322 |
File output = new File(System.getProperty("user.home"), "xml/identity/test-copy.xml");
|
|
304 | 323 |
if (!(input.exists() && input.canRead())) { |
305 | 324 |
System.out.println("cannot found $input"); |
306 | 325 |
return; |
... | ... | |
329 | 348 |
// parentParser.writer.writeAttribute("hook", name); |
330 | 349 |
// } |
331 | 350 |
// }; |
332 |
XPathHookActivator xpathActivator = new XPathHookActivator(null, "//p"); |
|
333 |
XPathsHookActivator xpathsActivator = new XPathsHookActivator(null, Arrays.asList("//p", "//p")); |
|
334 |
IdentityHook hook2 = new IdentityHook("hook2", xpathsActivator, builder) { |
|
335 |
|
|
336 |
@Override |
|
337 |
public boolean _activate() { |
|
338 |
System.out.println("ACTIVATING " + name + " AT " + getLocation()); |
|
339 |
return true; |
|
340 |
} |
|
341 |
|
|
342 |
@Override |
|
343 |
public boolean deactivate() { |
|
344 |
System.out.println("DEACTIVATING " + name + " AT " + getLocation()); |
|
345 |
return true; |
|
346 |
} |
|
347 |
|
|
348 |
@Override |
|
349 |
public void processStartElement() throws XMLStreamException, IOException { |
|
350 |
super.processStartElement(); |
|
351 |
parentParser.writer.writeAttribute("hook", name); |
|
352 |
} |
|
353 |
}; |
|
354 |
|
|
355 |
// DOMIdentityHook hook3 = new DOMIdentityHook("hook3", xpathsActivator, builder) { |
|
351 |
// XPathHookActivator xpathActivator = new XPathHookActivator(null, "//p");
|
|
352 |
// XPathsHookActivator xpathsActivator = new XPathsHookActivator(null, Arrays.asList("//p", "//p"));
|
|
353 |
// IdentityHook hook2 = new IdentityHook("hook2", xpathsActivator, builder) {
|
|
354 |
// |
|
355 |
// @Override
|
|
356 |
// public boolean _activate() {
|
|
357 |
// System.out.println("ACTIVATING " + name + " AT " + getLocation());
|
|
358 |
// return true;
|
|
359 |
// }
|
|
360 |
// |
|
361 |
// @Override
|
|
362 |
// public boolean deactivate() {
|
|
363 |
// System.out.println("DEACTIVATING " + name + " AT " + getLocation());
|
|
364 |
// return true;
|
|
365 |
// }
|
|
366 |
// |
|
367 |
// @Override
|
|
368 |
// public void processStartElement() throws XMLStreamException, IOException {
|
|
369 |
// super.processStartElement();
|
|
370 |
// parentParser.writer.writeAttribute("hook", name);
|
|
371 |
// }
|
|
372 |
// };
|
|
373 |
// |
|
374 |
// // DOMIdentityHook hook3 = new DOMIdentityHook("hook3", xpathsActivator, builder) {
|
|
356 | 375 |
// |
357 | 376 |
// }; |
358 | 377 |
|
359 | 378 |
long time = System.currentTimeMillis(); |
360 | 379 |
if (builder.process(output)) { |
361 | 380 |
System.out.println("Time=" + (System.currentTimeMillis() - time)); |
362 |
System.out.println("XPaths activator has done working ? " + xpathsActivator.hasAllXpathsBeenProcessed()); |
|
381 |
//System.out.println("XPaths activator has done working ? " + xpathsActivator.hasAllXpathsBeenProcessed());
|
|
363 | 382 |
} |
364 | 383 |
else { |
365 | 384 |
System.out.println("failure !"); |
tmp/org.txm.core/src/java/org/txm/importer/PersonalNamespaceContext.java (revision 2939) | ||
---|---|---|
44 | 44 |
|
45 | 45 |
public static HashMap<String, String> adress2name = new HashMap<String, String>(); |
46 | 46 |
public static HashMap<String, String> name2adress = new HashMap<String, String>(); |
47 |
static { |
|
48 |
addNamespace("tei", "http://www.tei-c.org/ns/1.0"); |
|
49 |
addNamespace("me", "http://www.menota.org/ns/1.0"); |
|
50 |
addNamespace("bfm", "http://bfm.ens-lsh.fr/ns/1.0"); |
|
51 |
addNamespace("txm", "http://textometrie.org/1.0"); |
|
52 |
addNamespace("xi", "http://www.w3.org/2001/XInclude"); |
|
53 |
addNamespace("fn", "http://www.w3.org/2005/xpath-functions"); |
|
54 |
addNamespace("xhtml", "http://www.w3.org/1999/xhtml"); |
|
55 |
addNamespace("svg", "http://www.w3.org/2000/svg"); |
|
56 |
addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI); |
|
57 |
} |
|
47 |
// static {
|
|
48 |
// addNamespace("tei", "http://www.tei-c.org/ns/1.0");
|
|
49 |
// addNamespace("me", "http://www.menota.org/ns/1.0");
|
|
50 |
// addNamespace("bfm", "http://bfm.ens-lsh.fr/ns/1.0");
|
|
51 |
// addNamespace("txm", "http://textometrie.org/1.0");
|
|
52 |
// addNamespace("xi", "http://www.w3.org/2001/XInclude");
|
|
53 |
// addNamespace("fn", "http://www.w3.org/2005/xpath-functions");
|
|
54 |
// addNamespace("xhtml", "http://www.w3.org/1999/xhtml");
|
|
55 |
// addNamespace("svg", "http://www.w3.org/2000/svg");
|
|
56 |
// addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
|
|
57 |
// }
|
|
58 | 58 |
/* |
59 | 59 |
String TEINS = "http://www.tei-c.org/ns/1.0" |
60 | 60 |
String MENS = "http://www.menota.org/ns/1.0"; |
... | ... | |
79 | 79 |
super(); |
80 | 80 |
} |
81 | 81 |
|
82 |
public static void addNamespace(String name, String address) {
|
|
82 |
public void addNamespace(String name, String address) { |
|
83 | 83 |
adress2name.put(address, name); |
84 | 84 |
name2adress.put(name, address); |
85 | 85 |
} |
tmp/org.txm.core/src/java/org/txm/importer/StaxIdentityParser.java (revision 2939) | ||
---|---|---|
14 | 14 |
import javax.xml.stream.XMLStreamReader; |
15 | 15 |
import javax.xml.stream.XMLStreamWriter; |
16 | 16 |
|
17 |
/** |
|
18 |
* XML Identity transformation. extends the processXYZ methods to do something |
|
19 |
* @deprecated use the org.txm.xml.XMLProcessor instead. The XMLProcessor implements Hooks to manage XML Event or XML DOM while parsing |
|
20 |
* @author mdecorde |
|
21 |
* |
|
22 |
*/ |
|
17 | 23 |
public class StaxIdentityParser { |
18 |
|
|
24 |
|
|
19 | 25 |
/** The input */ |
20 | 26 |
protected URL inputurl; |
21 |
|
|
27 |
|
|
22 | 28 |
protected InputStream inputData; |
23 |
|
|
29 |
|
|
24 | 30 |
protected XMLInputFactory factory; |
25 |
|
|
31 |
|
|
26 | 32 |
protected XMLStreamReader parser; |
27 |
|
|
33 |
|
|
28 | 34 |
/** The output. */ |
29 | 35 |
protected XMLOutputFactory outfactory = XMLOutputFactory.newInstance(); |
30 |
|
|
36 |
|
|
31 | 37 |
protected BufferedOutputStream output; |
32 |
|
|
38 |
|
|
33 | 39 |
protected XMLStreamWriter writer; |
34 |
|
|
40 |
|
|
35 | 41 |
public static String TXMNS = "http://textometrie.org/1.0"; |
36 |
|
|
42 |
|
|
37 | 43 |
public static String TXM = "txm"; |
38 |
|
|
44 |
|
|
39 | 45 |
public static String TEINS = "http://www.tei-c.org/ns/1.0"; |
40 |
|
|
46 |
|
|
41 | 47 |
public static String TEI = "tei"; |
42 |
|
|
43 |
protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
|
|
44 |
|
|
48 |
|
|
49 |
protected PersonalNamespaceContext Nscontext = new PersonalNamespaceContext(); |
|
50 |
|
|
45 | 51 |
// protected StringBuilder currentXPath = new StringBuilder("/") |
46 | 52 |
protected String localname; |
47 |
|
|
53 |
|
|
48 | 54 |
int processingXInclude = 0; |
49 |
|
|
55 |
|
|
50 | 56 |
public StaxIdentityParser(File infile) throws IOException, XMLStreamException { |
51 | 57 |
this(infile.toURI().toURL()); |
52 | 58 |
} |
53 |
|
|
59 |
|
|
54 | 60 |
public StaxIdentityParser(URL inputurl) throws IOException, XMLStreamException { |
55 | 61 |
this.inputurl = inputurl; |
56 | 62 |
this.inputData = inputurl.openStream(); |
57 | 63 |
this.factory = XMLInputFactory.newInstance(); |
58 | 64 |
this.parser = factory.createXMLStreamReader(inputData); |
65 |
|
|
59 | 66 |
} |
60 |
|
|
67 |
|
|
61 | 68 |
/** |
62 | 69 |
* Helper method to get an attribute value |
63 | 70 |
* |
... | ... | |
66 | 73 |
*/ |
67 | 74 |
public String getParserAttributeValue(String name) { |
68 | 75 |
if (name == null) return null; |
69 |
|
|
76 |
|
|
70 | 77 |
int c = parser.getAttributeCount(); |
71 | 78 |
for (int i = 0; i < c; i++) { |
72 | 79 |
if (name.equals(parser.getAttributeLocalName(i))) { |
73 | 80 |
return parser.getAttributeValue(i); |
74 | 81 |
} |
75 | 82 |
} |
76 |
|
|
83 |
|
|
77 | 84 |
return null; |
78 | 85 |
} |
79 |
|
|
86 |
|
|
80 | 87 |
protected void before() { |
81 |
|
|
88 |
|
|
82 | 89 |
} |
83 |
|
|
90 |
|
|
84 | 91 |
protected void after() throws XMLStreamException, IOException { |
85 | 92 |
factory = null; |
86 | 93 |
if (parser != null) parser.close(); |
... | ... | |
90 | 97 |
writer = null; |
91 | 98 |
parser = null; |
92 | 99 |
} |
93 |
|
|
100 |
|
|
94 | 101 |
protected void closeForError() throws XMLStreamException, IOException { |
95 | 102 |
if (parser != null) parser.close(); |
96 | 103 |
if (inputData != null) inputData.close(); |
97 | 104 |
} |
98 |
|
|
105 |
|
|
99 | 106 |
/** |
100 | 107 |
* Creates the output. |
101 | 108 |
* |
... | ... | |
108 | 115 |
writer.close(); |
109 | 116 |
if (output != null) // process from a file |
110 | 117 |
output.close(); |
111 |
|
|
118 |
|
|
112 | 119 |
output = new BufferedOutputStream(new FileOutputStream(f), 16 * 1024); |
113 |
|
|
120 |
|
|
114 | 121 |
writer = outfactory.createXMLStreamWriter(output, "UTF-8");// create a new file |
115 | 122 |
writer.setNamespaceContext(Nscontext); |
116 | 123 |
return true; |
... | ... | |
120 | 127 |
return false; |
121 | 128 |
} |
122 | 129 |
} |
123 |
|
|
130 |
|
|
124 | 131 |
public boolean process(File outfile) throws XMLStreamException, IOException { |
125 | 132 |
if (!createOutput(outfile)) |
126 | 133 |
return false; |
127 |
|
|
128 |
writer.writeStartDocument("UTF-8", "1.0"); |
|
129 |
writer.writeCharacters("\n"); |
|
134 |
|
|
135 |
// //writer.writeStartDocument("UTF-8", "1.0"); |
|
136 |
// writer.writeStartDocument(); |
|
137 |
// writer.writeCharacters("\n"); |
|
130 | 138 |
boolean ret = process(writer); |
131 | 139 |
if (writer != null) { |
132 | 140 |
writer.close(); |
... | ... | |
139 | 147 |
System.out.println("output excep: " + e); |
140 | 148 |
} |
141 | 149 |
} |
142 |
|
|
150 |
|
|
143 | 151 |
if (parser != null) { |
144 | 152 |
try { |
145 | 153 |
parser.close(); |
... | ... | |
148 | 156 |
System.out.println("parser excep: " + e); |
149 | 157 |
} |
150 | 158 |
} |
151 |
|
|
159 |
|
|
152 | 160 |
if (inputData != null) { |
153 | 161 |
try { |
154 | 162 |
inputData.close(); |
... | ... | |
157 | 165 |
System.out.println("inputData excep: " + e); |
158 | 166 |
} |
159 | 167 |
} |
160 |
|
|
168 |
|
|
161 | 169 |
return ret; |
162 | 170 |
} |
163 |
|
|
171 |
|
|
164 | 172 |
public final static String SLASH = "/"; |
165 |
|
|
173 |
public boolean firstElementStarted = false; |
|
166 | 174 |
public boolean process(XMLStreamWriter awriter) throws XMLStreamException, IOException { |
175 |
|
|
176 |
firstElementStarted = false; |
|
167 | 177 |
this.writer = awriter; |
168 | 178 |
// if (processingXInclude == 0) { |
169 |
before(); // if you need to do something before reading the xml
|
|
179 |
before(); // if you need to do something before parsing the XML
|
|
170 | 180 |
// } |
171 | 181 |
try { |
172 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
182 |
for (int event = parser.getEventType(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
|
173 | 183 |
switch (event) { |
174 |
case XMLStreamConstants.NAMESPACE: |
|
175 |
processNamespace(); |
|
176 |
break; |
|
177 |
case XMLStreamConstants.START_ELEMENT: |
|
178 |
localname = parser.getLocalName(); |
|
179 |
// currentXPath.append(SLASH) |
|
180 |
processStartElement(); |
|
181 |
break; |
|
182 |
case XMLStreamConstants.CHARACTERS: |
|
183 |
processCharacters(); |
|
184 |
break; |
|
185 |
case XMLStreamConstants.END_ELEMENT: |
|
186 |
localname = parser.getLocalName(); |
|
187 |
processEndElement(); |
|
188 |
// currentXPath.substring(0, currentXPath.length() - localname.length() -1) |
|
189 |
break; |
|
190 |
case XMLStreamConstants.PROCESSING_INSTRUCTION: |
|
191 |
processProcessingInstruction(); |
|
192 |
break; |
|
193 |
case XMLStreamConstants.DTD: |
|
194 |
processDTD(); |
|
195 |
break; |
|
196 |
case XMLStreamConstants.CDATA: |
|
197 |
processCDATA(); |
|
198 |
break; |
|
199 |
case XMLStreamConstants.COMMENT: |
|
200 |
processComment(); |
|
201 |
break; |
|
202 |
case XMLStreamConstants.END_DOCUMENT: |
|
203 |
processEndDocument(); |
|
204 |
break; |
|
205 |
case XMLStreamConstants.ENTITY_REFERENCE: |
|
206 |
processEntityReference(); |
|
207 |
break; |
|
184 |
case XMLStreamConstants.START_DOCUMENT: |
|
185 |
writer.writeStartDocument(parser.getCharacterEncodingScheme(), parser.getVersion()); |
|
186 |
writer.writeCharacters("\n"); |
|
187 |
break; |
|
188 |
case XMLStreamConstants.NAMESPACE: |
|
189 |
this.Nscontext.addNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
|
190 |
processNamespace(); |
|
191 |
break; |
|
192 |
case XMLStreamConstants.START_ELEMENT: |
|
193 |
firstElementStarted = true; |
|
194 |
localname = parser.getLocalName(); |
|
195 |
// currentXPath.append(SLASH) |
|
196 |
processStartElement(); |
|
197 |
break; |
|
198 |
case XMLStreamConstants.NOTATION_DECLARATION: |
|
199 |
break; |
|
200 |
case XMLStreamConstants.SPACE: |
|
201 |
break; |
|
202 |
case XMLStreamConstants.CHARACTERS: |
|
203 |
processCharacters(); |
|
204 |
break; |
|
205 |
case XMLStreamConstants.END_ELEMENT: |
|
206 |
localname = parser.getLocalName(); |
|
207 |
processEndElement(); |
|
208 |
// currentXPath.substring(0, currentXPath.length() - localname.length() -1) |
|
209 |
break; |
|
210 |
case XMLStreamConstants.PROCESSING_INSTRUCTION: |
|
211 |
processProcessingInstruction(); |
|
212 |
break; |
|
213 |
case XMLStreamConstants.DTD: |
|
214 |
processDTD(); |
|
215 |
break; |
|
216 |
case XMLStreamConstants.CDATA: |
|
217 |
processCDATA(); |
|
218 |
break; |
|
219 |
case XMLStreamConstants.COMMENT: |
|
220 |
processComment(); |
|
221 |
break; |
|
222 |
case XMLStreamConstants.END_DOCUMENT: |
|
223 |
processEndDocument(); |
|
224 |
break; |
|
225 |
case XMLStreamConstants.ENTITY_REFERENCE: |
|
226 |
processEntityReference(); |
|
227 |
break; |
|
208 | 228 |
} |
209 | 229 |
} |
210 | 230 |
} |
... | ... | |
224 | 244 |
// } |
225 | 245 |
return true; |
226 | 246 |
} |
227 |
|
|
247 |
|
|
228 | 248 |
/** |
229 | 249 |
* The start element has already been written |
230 | 250 |
* |
... | ... | |
239 | 259 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
240 | 260 |
// System.out.println("event "+event |
241 | 261 |
switch (event) { |
242 |
case XMLStreamConstants.NAMESPACE:
|
|
243 |
processNamespace();
|
|
244 |
break;
|
|
245 |
case XMLStreamConstants.START_ELEMENT:
|
|
246 |
elements++;
|
|
247 |
localname = parser.getLocalName();
|
|
248 |
// currentXPath.append(SLASH)
|
|
249 |
_processStartElement();
|
|
250 |
break;
|
|
251 |
case XMLStreamConstants.CHARACTERS:
|
|
252 |
processCharacters();
|
|
253 |
break;
|
|
254 |
case XMLStreamConstants.PROCESSING_INSTRUCTION:
|
|
255 |
processProcessingInstruction();
|
|
256 |
break;
|
|
257 |
case XMLStreamConstants.DTD:
|
|
258 |
processDTD();
|
|
259 |
break;
|
|
260 |
case XMLStreamConstants.CDATA:
|
|
261 |
processCDATA();
|
|
262 |
break;
|
|
263 |
case XMLStreamConstants.COMMENT:
|
|
264 |
processComment();
|
|
265 |
break;
|
|
266 |
case XMLStreamConstants.END_ELEMENT:
|
|
267 |
elements--;
|
|
268 |
localname = parser.getLocalName();
|
|
269 |
// currentXPath.substring(0, currentXPath.length() - localname.length() -1)
|
|
270 |
writer.writeEndElement();
|
|
271 |
if (elements == 0 && localname == tagname)
|
|
272 |
return;
|
|
273 |
break;
|
|
274 |
case XMLStreamConstants.END_DOCUMENT:
|
|
275 |
processEndDocument();
|
|
276 |
break;
|
|
277 |
case XMLStreamConstants.ENTITY_REFERENCE:
|
|
278 |
processEntityReference();
|
|
279 |
break;
|
|
262 |
case XMLStreamConstants.NAMESPACE: |
|
263 |
processNamespace(); |
|
264 |
break; |
|
265 |
case XMLStreamConstants.START_ELEMENT: |
|
266 |
elements++; |
|
267 |
localname = parser.getLocalName(); |
|
268 |
// currentXPath.append(SLASH) |
|
269 |
_processStartElement(); |
|
270 |
break; |
|
271 |
case XMLStreamConstants.CHARACTERS: |
|
272 |
processCharacters(); |
|
273 |
break; |
|
274 |
case XMLStreamConstants.PROCESSING_INSTRUCTION: |
|
275 |
processProcessingInstruction(); |
|
276 |
break; |
|
277 |
case XMLStreamConstants.DTD: |
|
278 |
processDTD(); |
|
279 |
break; |
|
280 |
case XMLStreamConstants.CDATA: |
|
281 |
processCDATA(); |
|
282 |
break; |
|
283 |
case XMLStreamConstants.COMMENT: |
|
284 |
processComment(); |
|
285 |
break; |
|
286 |
case XMLStreamConstants.END_ELEMENT: |
|
287 |
elements--; |
|
288 |
localname = parser.getLocalName(); |
|
289 |
// currentXPath.substring(0, currentXPath.length() - localname.length() -1) |
|
290 |
writer.writeEndElement(); |
|
291 |
if (elements == 0 && localname == tagname) |
|
292 |
return; |
|
293 |
break; |
|
294 |
case XMLStreamConstants.END_DOCUMENT: |
|
295 |
processEndDocument(); |
|
296 |
break; |
|
297 |
case XMLStreamConstants.ENTITY_REFERENCE: |
|
298 |
processEntityReference(); |
|
299 |
break; |
|
280 | 300 |
} |
281 | 301 |
} |
282 | 302 |
} |
... | ... | |
289 | 309 |
return; |
290 | 310 |
} |
291 | 311 |
} |
292 |
|
|
312 |
|
|
293 | 313 |
public String getLocation() { |
294 | 314 |
if (parser != null) |
295 | 315 |
return "Line: " + parser.getLocation().getLineNumber() + " Col: " + parser.getLocation().getColumnNumber(); |
296 | 316 |
return null; |
297 | 317 |
} |
298 |
|
|
318 |
|
|
299 | 319 |
protected void processNamespace() throws XMLStreamException { |
300 | 320 |
writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI()); |
301 | 321 |
} |
302 |
|
|
322 |
|
|
303 | 323 |
public static final String INCLUDE = "include"; |
304 |
|
|
324 |
|
|
305 | 325 |
public static final String XI = "xi"; |
306 |
|
|
326 |
|
|
307 | 327 |
protected void processStartElement() throws XMLStreamException, IOException { |
308 | 328 |
String prefix = parser.getPrefix(); |
309 | 329 |
if (INCLUDE == localname && XI == prefix) { |
... | ... | |
314 | 334 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname); |
315 | 335 |
else |
316 | 336 |
writer.writeStartElement(localname); |
317 |
|
|
337 |
|
|
318 | 338 |
for (int i = 0; i < parser.getNamespaceCount(); i++) { |
319 | 339 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
320 | 340 |
} |
321 |
|
|
341 |
|
|
322 | 342 |
writeAttributes(); |
323 | 343 |
} |
324 | 344 |
} |
325 |
|
|
345 |
|
|
326 | 346 |
private void _processStartElement() throws XMLStreamException, IOException { |
327 | 347 |
String prefix = parser.getPrefix(); |
328 | 348 |
if (INCLUDE == localname && XI == prefix) { |
... | ... | |
333 | 353 |
writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname); |
334 | 354 |
else |
335 | 355 |
writer.writeStartElement(localname); |
336 |
|
|
356 |
|
|
337 | 357 |
for (int i = 0; i < parser.getNamespaceCount(); i++) { |
338 | 358 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
339 | 359 |
} |
340 |
|
|
360 |
|
|
341 | 361 |
writeAttributes(); |
342 | 362 |
} |
343 | 363 |
} |
344 |
|
|
364 |
|
|
345 | 365 |
protected void writeAttributes() throws XMLStreamException { |
346 | 366 |
for (int i = 0; i < parser.getAttributeCount(); i++) { |
347 | 367 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
348 | 368 |
} |
349 | 369 |
} |
350 |
|
|
370 |
|
|
351 | 371 |
protected void writeAttribute(String prefix, String name, String value) throws XMLStreamException { |
352 | 372 |
if (prefix != null && prefix.length() > 0) |
353 | 373 |
writer.writeAttribute(prefix + ":" + name, value); |
354 | 374 |
else |
355 | 375 |
writer.writeAttribute(name, value); |
356 | 376 |
} |
357 |
|
|
377 |
|
|
358 | 378 |
protected void processCharacters() throws XMLStreamException { |
359 | 379 |
writer.writeCharacters(parser.getText()); |
360 | 380 |
} |
361 |
|
|
381 |
|
|
362 | 382 |
protected void processProcessingInstruction() throws XMLStreamException { |
363 | 383 |
writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData()); |
384 |
if (!firstElementStarted) writer.writeCharacters("\n"); // new lines are not reported before the first element is parsed |
|
364 | 385 |
} |
365 |
|
|
386 |
|
|
366 | 387 |
protected void processDTD() throws XMLStreamException { |
367 | 388 |
writer.writeDTD(parser.getText()); |
368 | 389 |
} |
369 |
|
|
390 |
|
|
370 | 391 |
protected void processCDATA() throws XMLStreamException { |
371 | 392 |
writer.writeCData(parser.getText()); |
372 | 393 |
} |
373 |
|
|
394 |
|
|
374 | 395 |
protected void processComment() throws XMLStreamException { |
375 | 396 |
writer.writeComment(parser.getText()); |
376 | 397 |
} |
377 |
|
|
398 |
|
|
378 | 399 |
protected void processEndElement() throws XMLStreamException { |
379 | 400 |
if (localname == INCLUDE && parser.getPrefix() == XI) { |
380 | 401 |
// nothing !! |
... | ... | |
383 | 404 |
writer.writeEndElement(); |
384 | 405 |
} |
385 | 406 |
} |
386 |
|
|
407 |
|
|
387 | 408 |
protected void processEndDocument() throws XMLStreamException { |
388 | 409 |
writer.writeEndDocument(); |
389 | 410 |
} |
390 |
|
|
411 |
|
|
391 | 412 |
protected void processEntityReference() throws XMLStreamException { |
392 | 413 |
writer.writeEntityRef(parser.getLocalName()); |
393 | 414 |
} |
394 |
|
|
415 |
|
|
395 | 416 |
/** |
396 | 417 |
* Process the XInclude elements |
397 | 418 |
* |
... | ... | |
419 | 440 |
System.out.println("Warning referenced file: $ref does not exists"); |
420 | 441 |
} |
421 | 442 |
} |
422 |
|
|
443 |
|
|
423 | 444 |
public static void main(String[] args) { |
424 | 445 |
try { |
425 | 446 |
File input = new File("/home/mdecorde/xml/identity/test.xml"); |
... | ... | |
429 | 450 |
return; |
430 | 451 |
} |
431 | 452 |
StaxIdentityParser builder; |
432 |
|
|
453 |
|
|
433 | 454 |
builder = new StaxIdentityParser(input.toURI().toURL()); |
434 |
|
|
455 |
|
|
435 | 456 |
if (builder.process(output)) { |
436 | 457 |
System.out.println("success ? " + ValidateXml.test(output)); |
437 | 458 |
} |
Formats disponibles : Unified diff