Révision 2981

tmp/org.txm.core/src/java/org/txm/importer/StaxIdentityParser.java (revision 2981)
14 14
import javax.xml.stream.XMLStreamReader;
15 15
import javax.xml.stream.XMLStreamWriter;
16 16

  
17
import org.txm.utils.logger.Log;
18

  
17 19
/**
18 20
 * XML Identity transformation. extends the processXYZ methods to do something
19
 * @deprecated use the  org.txm.xml.XMLProcessor instead. The XMLProcessor implements Hooks to manage XML Event or XML DOM while parsing
21
 * 
22
 * @deprecated use the org.txm.xml.XMLProcessor instead. The XMLProcessor implements Hooks to manage XML Event or XML DOM while parsing
20 23
 * @author mdecorde
21 24
 *
22 25
 */
26
@Deprecated
23 27
public class StaxIdentityParser {
24

  
28
	
25 29
	/** The input */
26 30
	protected URL inputurl;
27

  
31
	
28 32
	protected InputStream inputData;
29

  
33
	
30 34
	protected XMLInputFactory factory;
31

  
35
	
32 36
	protected XMLStreamReader parser;
33

  
37
	
34 38
	/** The output. */
35 39
	protected XMLOutputFactory outfactory = XMLOutputFactory.newInstance();
36

  
40
	
37 41
	protected BufferedOutputStream output;
38

  
42
	
39 43
	protected XMLStreamWriter writer;
40

  
44
	
41 45
	public static String TXMNS = "http://textometrie.org/1.0";
42

  
46
	
43 47
	public static String TXM = "txm";
44

  
48
	
45 49
	public static String TEINS = "http://www.tei-c.org/ns/1.0";
46

  
50
	
47 51
	public static String TEI = "tei";
48

  
52
	
49 53
	protected PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
50

  
54
	
51 55
	// protected StringBuilder currentXPath = new StringBuilder("/")
52 56
	protected String localname;
53

  
57
	
54 58
	int processingXInclude = 0;
55

  
59
	
56 60
	public StaxIdentityParser(File infile) throws IOException, XMLStreamException {
57 61
		this(infile.toURI().toURL());
58 62
	}
59

  
63
	
60 64
	public StaxIdentityParser(URL inputurl) throws IOException, XMLStreamException {
61 65
		this.inputurl = inputurl;
62 66
		this.inputData = inputurl.openStream();
......
64 68
		this.parser = factory.createXMLStreamReader(inputData);
65 69
		
66 70
	}
67

  
71
	
68 72
	/**
69 73
	 * Helper method to get an attribute value
70 74
	 * 
......
73 77
	 */
74 78
	public String getParserAttributeValue(String name) {
75 79
		if (name == null) return null;
76

  
80
		
77 81
		int c = parser.getAttributeCount();
78 82
		for (int i = 0; i < c; i++) {
79 83
			if (name.equals(parser.getAttributeLocalName(i))) {
80 84
				return parser.getAttributeValue(i);
81 85
			}
82 86
		}
83

  
87
		
84 88
		return null;
85 89
	}
86

  
90
	
87 91
	protected void before() {
88

  
92
		
89 93
	}
90

  
94
	
91 95
	protected void after() throws XMLStreamException, IOException {
92 96
		factory = null;
93 97
		if (parser != null) parser.close();
......
97 101
		writer = null;
98 102
		parser = null;
99 103
	}
100

  
104
	
101 105
	protected void closeForError() throws XMLStreamException, IOException {
102 106
		if (parser != null) parser.close();
103 107
		if (inputData != null) inputData.close();
104 108
	}
105

  
109
	
106 110
	/**
107 111
	 * Creates the output.
108 112
	 *
109 113
	 * @param outfile the outfile
110 114
	 * @return true, if successful
111 115
	 */
112
	private boolean createOutput(File f) {
116
	private boolean createOutput(File f, String encoding) {
113 117
		try {
114 118
			if (writer != null) // process from a file
115 119
				writer.close();
116 120
			if (output != null) // process from a file
117 121
				output.close();
118

  
122
			
119 123
			output = new BufferedOutputStream(new FileOutputStream(f), 16 * 1024);
120

  
121
			writer = outfactory.createXMLStreamWriter(output, "UTF-8");// create a new file
124
			
125
			writer = outfactory.createXMLStreamWriter(output, encoding);// create a new file
122 126
			writer.setNamespaceContext(Nscontext);
123 127
			return true;
124 128
		}
......
127 131
			return false;
128 132
		}
129 133
	}
130

  
134
	
135
	/**
136
	 * Default output encoding is UTF-8
137
	 * 
138
	 * @param outfile
139
	 * @return
140
	 * @throws XMLStreamException
141
	 * @throws IOException
142
	 */
131 143
	public boolean process(File outfile) throws XMLStreamException, IOException {
132
		if (!createOutput(outfile))
133
			return false;
134

  
135
//		//writer.writeStartDocument("UTF-8", "1.0");
136
//		writer.writeStartDocument();
137
//		writer.writeCharacters("\n");
138
		boolean ret = process(writer);
144
		return process(outfile, "UTF-8");
145
	}
146
	
147
	public boolean process(File outfile, String encoding) throws XMLStreamException, IOException {
148
		// //writer.writeStartDocument("UTF-8", "1.0");
149
		// writer.writeStartDocument();
150
		// writer.writeCharacters("\n");
151
		boolean ret = process(null, outfile, encoding);
139 152
		if (writer != null) {
140 153
			writer.close();
141 154
		}
......
147 160
				System.out.println("output excep: " + e);
148 161
			}
149 162
		}
150

  
163
		
151 164
		if (parser != null) {
152 165
			try {
153 166
				parser.close();
......
156 169
				System.out.println("parser excep: " + e);
157 170
			}
158 171
		}
159

  
172
		
160 173
		if (inputData != null) {
161 174
			try {
162 175
				inputData.close();
......
165 178
				System.out.println("inputData excep: " + e);
166 179
			}
167 180
		}
168

  
181
		
169 182
		return ret;
170 183
	}
171

  
184
	
172 185
	public final static String SLASH = "/";
186
	
173 187
	public boolean firstElementStarted = false;
188
	
189
	
190
	/**
191
	 * If the writer is given, the expected encoding is UTF-8
192
	 * 
193
	 * @param awriter
194
	 * @return
195
	 * @throws XMLStreamException
196
	 * @throws IOException
197
	 */
174 198
	public boolean process(XMLStreamWriter awriter) throws XMLStreamException, IOException {
175
		
199
		if (awriter == null) {
200
			Log.warning("Error: called without XMLStreamWriter.");
201
			return false;
202
		}
203
		return process(awriter, null, "UTF-8");
204
	}
205
	
206
	
207
	private boolean process(XMLStreamWriter awriter, File outfile, String encoding) throws XMLStreamException, IOException {
176 208
		firstElementStarted = false;
177 209
		this.writer = awriter;
210
		
178 211
		// if (processingXInclude == 0) {
179 212
		before(); // if you need to do something before parsing the XML
180 213
		// }
181 214
		try {
182 215
			for (int event = parser.getEventType(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
183 216
				switch (event) {
184
				case XMLStreamConstants.START_DOCUMENT:
185
					writer.writeStartDocument(parser.getCharacterEncodingScheme(), parser.getVersion());
186
					writer.writeCharacters("\n");
187
					break;
188
				case XMLStreamConstants.NAMESPACE:
189
					this.Nscontext.addNamespace(parser.getPrefix(), parser.getNamespaceURI());
190
					processNamespace();
191
					break;
192
				case XMLStreamConstants.START_ELEMENT:
193
					firstElementStarted = true;
194
					localname = parser.getLocalName();
195
					// currentXPath.append(SLASH)
196
					processStartElement();
197
					break;
198
				case XMLStreamConstants.NOTATION_DECLARATION:
199
					break;
200
				case XMLStreamConstants.SPACE:
201
					break;
202
				case XMLStreamConstants.CHARACTERS:
203
					processCharacters();
204
					break;
205
				case XMLStreamConstants.END_ELEMENT:
206
					localname = parser.getLocalName();
207
					processEndElement();
208
					// currentXPath.substring(0, currentXPath.length() - localname.length() -1)
209
					break;
210
				case XMLStreamConstants.PROCESSING_INSTRUCTION:
211
					processProcessingInstruction();
212
					break;
213
				case XMLStreamConstants.DTD:
214
					processDTD();
215
					break;
216
				case XMLStreamConstants.CDATA:
217
					processCDATA();
218
					break;
219
				case XMLStreamConstants.COMMENT:
220
					processComment();
221
					break;
222
				case XMLStreamConstants.END_DOCUMENT:
223
					processEndDocument();
224
					break;
225
				case XMLStreamConstants.ENTITY_REFERENCE:
226
					processEntityReference();
227
					break;
217
					case XMLStreamConstants.START_DOCUMENT:
218
						if (encoding == null) { // if encoding is not specified,
219
							encoding = parser.getCharacterEncodingScheme();
220
						}
221
						if (awriter == null) {
222
							if (!createOutput(outfile, encoding)) {
223
								return false;
224
							}
225
						}
226
						else {
227
							encoding = "UTF-8"; // if awriter is set encoding must be UTF-8
228
						}
229
						
230
						writer.writeStartDocument(encoding, parser.getVersion());
231
						writer.writeCharacters("\n");
232
						break;
233
					case XMLStreamConstants.NAMESPACE:
234
						this.Nscontext.addNamespace(parser.getPrefix(), parser.getNamespaceURI());
235
						processNamespace();
236
						break;
237
					case XMLStreamConstants.START_ELEMENT:
238
						firstElementStarted = true;
239
						localname = parser.getLocalName();
240
						// currentXPath.append(SLASH)
241
						processStartElement();
242
						break;
243
					case XMLStreamConstants.NOTATION_DECLARATION:
244
						break;
245
					case XMLStreamConstants.SPACE:
246
						break;
247
					case XMLStreamConstants.CHARACTERS:
248
						processCharacters();
249
						break;
250
					case XMLStreamConstants.END_ELEMENT:
251
						localname = parser.getLocalName();
252
						processEndElement();
253
						// currentXPath.substring(0, currentXPath.length() - localname.length() -1)
254
						break;
255
					case XMLStreamConstants.PROCESSING_INSTRUCTION:
256
						processProcessingInstruction();
257
						break;
258
					case XMLStreamConstants.DTD:
259
						processDTD();
260
						break;
261
					case XMLStreamConstants.CDATA:
262
						processCDATA();
263
						break;
264
					case XMLStreamConstants.COMMENT:
265
						processComment();
266
						break;
267
					case XMLStreamConstants.END_DOCUMENT:
268
						processEndDocument();
269
						break;
270
					case XMLStreamConstants.ENTITY_REFERENCE:
271
						processEntityReference();
272
						break;
228 273
				}
229 274
			}
230 275
		}
......
244 289
		// }
245 290
		return true;
246 291
	}
247

  
292
	
248 293
	/**
249 294
	 * The start element has already been written
250 295
	 * 
......
259 304
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
260 305
				// System.out.println("event "+event
261 306
				switch (event) {
262
				case XMLStreamConstants.NAMESPACE:
263
					processNamespace();
264
					break;
265
				case XMLStreamConstants.START_ELEMENT:
266
					elements++;
267
					localname = parser.getLocalName();
268
					// currentXPath.append(SLASH)
269
					_processStartElement();
270
					break;
271
				case XMLStreamConstants.CHARACTERS:
272
					processCharacters();
273
					break;
274
				case XMLStreamConstants.PROCESSING_INSTRUCTION:
275
					processProcessingInstruction();
276
					break;
277
				case XMLStreamConstants.DTD:
278
					processDTD();
279
					break;
280
				case XMLStreamConstants.CDATA:
281
					processCDATA();
282
					break;
283
				case XMLStreamConstants.COMMENT:
284
					processComment();
285
					break;
286
				case XMLStreamConstants.END_ELEMENT:
287
					elements--;
288
					localname = parser.getLocalName();
289
					// currentXPath.substring(0, currentXPath.length() - localname.length() -1)
290
					writer.writeEndElement();
291
					if (elements == 0 && localname == tagname)
292
						return;
293
					break;
294
				case XMLStreamConstants.END_DOCUMENT:
295
					processEndDocument();
296
					break;
297
				case XMLStreamConstants.ENTITY_REFERENCE:
298
					processEntityReference();
299
					break;
307
					case XMLStreamConstants.NAMESPACE:
308
						processNamespace();
309
						break;
310
					case XMLStreamConstants.START_ELEMENT:
311
						elements++;
312
						localname = parser.getLocalName();
313
						// currentXPath.append(SLASH)
314
						_processStartElement();
315
						break;
316
					case XMLStreamConstants.CHARACTERS:
317
						processCharacters();
318
						break;
319
					case XMLStreamConstants.PROCESSING_INSTRUCTION:
320
						processProcessingInstruction();
321
						break;
322
					case XMLStreamConstants.DTD:
323
						processDTD();
324
						break;
325
					case XMLStreamConstants.CDATA:
326
						processCDATA();
327
						break;
328
					case XMLStreamConstants.COMMENT:
329
						processComment();
330
						break;
331
					case XMLStreamConstants.END_ELEMENT:
332
						elements--;
333
						localname = parser.getLocalName();
334
						// currentXPath.substring(0, currentXPath.length() - localname.length() -1)
335
						writer.writeEndElement();
336
						if (elements == 0 && localname == tagname)
337
							return;
338
						break;
339
					case XMLStreamConstants.END_DOCUMENT:
340
						processEndDocument();
341
						break;
342
					case XMLStreamConstants.ENTITY_REFERENCE:
343
						processEntityReference();
344
						break;
300 345
				}
301 346
			}
302 347
		}
......
309 354
			return;
310 355
		}
311 356
	}
312

  
357
	
313 358
	public String getLocation() {
314 359
		if (parser != null)
315 360
			return "Line: " + parser.getLocation().getLineNumber() + " Col: " + parser.getLocation().getColumnNumber();
316 361
		return null;
317 362
	}
318

  
363
	
319 364
	protected void processNamespace() throws XMLStreamException {
320 365
		writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
321 366
	}
322

  
367
	
323 368
	public static final String INCLUDE = "include";
324

  
369
	
325 370
	public static final String XI = "xi";
326

  
371
	
327 372
	protected void processStartElement() throws XMLStreamException, IOException {
328 373
		String prefix = parser.getPrefix();
329 374
		if (INCLUDE == localname && XI == prefix) {
......
334 379
				writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname);
335 380
			else
336 381
				writer.writeStartElement(localname);
337

  
382
			
338 383
			for (int i = 0; i < parser.getNamespaceCount(); i++) {
339 384
				writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
340 385
			}
341

  
386
			
342 387
			writeAttributes();
343 388
		}
344 389
	}
345

  
390
	
346 391
	private void _processStartElement() throws XMLStreamException, IOException {
347 392
		String prefix = parser.getPrefix();
348 393
		if (INCLUDE == localname && XI == prefix) {
......
353 398
				writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname);
354 399
			else
355 400
				writer.writeStartElement(localname);
356

  
401
			
357 402
			for (int i = 0; i < parser.getNamespaceCount(); i++) {
358 403
				writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
359 404
			}
360

  
405
			
361 406
			writeAttributes();
362 407
		}
363 408
	}
364

  
409
	
365 410
	protected void writeAttributes() throws XMLStreamException {
366 411
		for (int i = 0; i < parser.getAttributeCount(); i++) {
367 412
			writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i));
368 413
		}
369 414
	}
370

  
415
	
371 416
	protected void writeAttribute(String prefix, String name, String value) throws XMLStreamException {
372 417
		if (prefix != null && prefix.length() > 0)
373 418
			writer.writeAttribute(prefix + ":" + name, value);
374 419
		else
375 420
			writer.writeAttribute(name, value);
376 421
	}
377

  
422
	
378 423
	protected void processCharacters() throws XMLStreamException {
379 424
		writer.writeCharacters(parser.getText());
380 425
	}
381

  
426
	
382 427
	protected void processProcessingInstruction() throws XMLStreamException {
383 428
		writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
384 429
		if (!firstElementStarted) writer.writeCharacters("\n"); // new lines are not reported before the first element is parsed
385 430
	}
386

  
431
	
387 432
	protected void processDTD() throws XMLStreamException {
388 433
		writer.writeDTD(parser.getText());
389 434
	}
390

  
435
	
391 436
	protected void processCDATA() throws XMLStreamException {
392 437
		writer.writeCData(parser.getText());
393 438
	}
394

  
439
	
395 440
	protected void processComment() throws XMLStreamException {
396 441
		writer.writeComment(parser.getText());
397 442
	}
398

  
443
	
399 444
	protected void processEndElement() throws XMLStreamException {
400 445
		if (localname == INCLUDE && parser.getPrefix() == XI) {
401 446
			// nothing !!
......
404 449
			writer.writeEndElement();
405 450
		}
406 451
	}
407

  
452
	
408 453
	protected void processEndDocument() throws XMLStreamException {
409 454
		writer.writeEndDocument();
410 455
	}
411

  
456
	
412 457
	protected void processEntityReference() throws XMLStreamException {
413 458
		writer.writeEntityRef(parser.getLocalName());
414 459
	}
415

  
460
	
416 461
	/**
417 462
	 * Process the XInclude elements
418 463
	 * 
......
440 485
			System.out.println("Warning referenced file: $ref does not exists");
441 486
		}
442 487
	}
443

  
488
	
444 489
	public static void main(String[] args) {
445 490
		try {
446 491
			File input = new File("/home/mdecorde/xml/identity/test.xml");
447
			File output = new File("/home/mdecorde/xml/identity/test-copy.xml");
492
			File outputFile = new File("/home/mdecorde/xml/identity/test-copy.xml");
448 493
			if (!(input.exists() && input.canRead())) {
449 494
				System.out.println("cannot found $input");
450 495
				return;
451 496
			}
452 497
			StaxIdentityParser builder;
453

  
498
			
454 499
			builder = new StaxIdentityParser(input.toURI().toURL());
455

  
456
			if (builder.process(output)) {
457
				System.out.println("success ? " + ValidateXml.test(output));
500
			
501
			if (builder.process(outputFile)) {
502
				System.out.println("success ? " + ValidateXml.test(outputFile));
458 503
			}
459 504
			else {
460 505
				System.out.println("failure !");
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2981)
192 192
		cwbEn.setDebug(debug);
193 193
		cwbMa.setDebug(debug);
194 194

  
195
		String uAttr = "u:0+spkid+spk+scope+accent+s+time+check+dialect+type";
195
		String uAttr = "u:0+spkid+who+scope+accent+s+time+check+dialect+type";
196 196
		String textAttr ="text:0+base+project"
197 197
		if (trans != null) {
198 198
			for (String key : trans.keySet()) {
......
230 230
					cqpFile.getAbsolutePath(),
231 231
					registryFile.getAbsolutePath(), pAttributes, sAttributes);
232 232
			if (!registryFile.exists()) {
233
				println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
233
				println "Error: The registry file was not created: $registryFile. See https://groupes.renater.fr/wiki/txm-users/public/faq"
234 234
				return false;
235 235
			}
236 236
			cwbMa.run(corpusname, registryFile.getParent());
......
375 375
								String name = parser.getAttributeLocalName(i).replace("_","").toLowerCase()
376 376
								output.write(" "+name+"=\""+parser.getAttributeValue(i).replace("\"", "&quot;")+"\"");
377 377
								if (name == "time") formatedTime = parser.getAttributeValue(i)
378
								else if (name == "spk") u_name = parser.getAttributeValue(i)
378
								else if (name == "who") u_name = parser.getAttributeValue(i)
379 379
							}
380 380
							output.write ">\n"
381 381
							break;
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TRSToTEI.groovy (revision 2981)
139 139
				writer.writeEndElement() // teiHeader
140 140
				writer.writeStartElement("text")
141 141
				writer.writeAttribute("id", textid)
142
				for(def k : informations.keySet()) writer.writeAttribute(k, informations[k])
142
				for (def k : informations.keySet()) writer.writeAttribute(k, informations[k])
143 143
				beforeBody = false; // end of info parsing
144 144
				break;
145 145
		}
......
289 289
		def attributes = speakers.get(vSpeaker)
290 290
		if (attributes == null) { // in case of Who@n wrong number
291 291
			if (vSpeaker.startsWith("#") && vSpeaker.endsWith("?")) { // don't show "N/A" vSpeaker
292
				writer.writeAttribute("spk", vSpeaker)
292
				writer.writeAttribute("who", vSpeaker)
293 293
				writer.writeAttribute("spkid", vSpeaker)
294 294
				writeAttributes();
295 295
			}
......
298 298
//				println " write attribute "+p.getFirst()+" "+p.getSecond()
299 299
				String attrn = p.getFirst().toString();
300 300
				if (attrn == "name") { // rename @name to @spk
301
					attrn = "spk"
301
					attrn = "who"
302 302
					u_name = p.getSecond();
303 303
				} else if (attrn == "id") { // rename @id to @spkid
304 304
					attrn = "spkid"
305 305
				}
306 306

  
307
				writer.writeAttribute(attrn,p.getSecond())
307
				writer.writeAttribute(attrn, p.getSecond())
308 308
			}
309 309
		}
310 310
		uOpened = true;
311 311
	}
312
	
312 313
	private String formatTime(String time) {
313 314
		try {
314 315
			return formatTime(Float.parseFloat(time))

Formats disponibles : Unified diff