58 |
58 |
{
|
59 |
59 |
/** The dir. */
|
60 |
60 |
private def dir;
|
61 |
|
|
|
61 |
|
62 |
62 |
/** The convert all attributes. */
|
63 |
63 |
private boolean convertAllAttributes = false;
|
64 |
|
|
|
64 |
|
65 |
65 |
/** The corresp type. */
|
66 |
66 |
HashMap<String,String> correspType;
|
67 |
|
|
|
67 |
|
68 |
68 |
/** The corresp ref. */
|
69 |
69 |
HashMap<String,String> correspRef;
|
70 |
|
|
|
70 |
|
71 |
71 |
/** The check tags. */
|
72 |
72 |
HashMap<String,Boolean> checkTags = new HashMap<String,Boolean>();
|
73 |
|
|
|
73 |
|
74 |
74 |
/** The resp id. */
|
75 |
75 |
def respId = [];
|
76 |
|
|
|
76 |
|
77 |
77 |
/** The applications. */
|
78 |
78 |
HashMap<String,File> applications;
|
79 |
|
|
|
79 |
|
80 |
80 |
/** The taxonomies. */
|
81 |
81 |
HashMap<String,String[]> taxonomies;
|
82 |
|
|
|
82 |
|
83 |
83 |
/** The resps. */
|
84 |
84 |
HashMap<String,String[]> resps;
|
85 |
|
|
|
85 |
|
86 |
86 |
/** The items. */
|
87 |
87 |
HashMap<String,HashMap<String,String>> items;
|
88 |
|
|
|
88 |
|
89 |
89 |
/** The XML headeradded. */
|
90 |
90 |
boolean XMLHeaderadded = false;
|
91 |
91 |
String textname;
|
92 |
92 |
String wtag = "w";
|
93 |
|
|
|
93 |
|
94 |
94 |
public static final String TEXT = "text"
|
95 |
95 |
public static final String ID = "id"
|
96 |
|
|
|
96 |
|
97 |
97 |
/**
|
98 |
98 |
* Instantiates a new xml2 ana.
|
99 |
99 |
*
|
... | ... | |
107 |
107 |
int idx = textname.lastIndexOf(".");
|
108 |
108 |
if (idx > 0)
|
109 |
109 |
textname = textname.substring(0, idx)
|
110 |
|
|
111 |
|
|
|
110 |
|
|
111 |
|
112 |
112 |
checkTags.put("respStmt",false);
|
113 |
113 |
checkTags.put("titleStmt",false);
|
114 |
114 |
checkTags.put("appInfo",false);
|
115 |
|
|
|
115 |
|
116 |
116 |
hasText = new HasElement(file, TEXT).process();
|
117 |
117 |
}
|
118 |
|
|
|
118 |
|
119 |
119 |
/**
|
120 |
120 |
* Sets the convert all atrtibutes.
|
121 |
121 |
*
|
... | ... | |
125 |
125 |
public setConvertAllAtrtibutes(boolean value) {
|
126 |
126 |
convertAllAttributes = value;
|
127 |
127 |
}
|
128 |
|
|
|
128 |
|
129 |
129 |
/**
|
130 |
130 |
* Sets the convert all atrtibutes.
|
131 |
131 |
*
|
... | ... | |
135 |
135 |
public setWordTag(String wtag) {
|
136 |
136 |
this.wtag = wtag
|
137 |
137 |
}
|
138 |
|
|
|
138 |
|
139 |
139 |
int idcount = 0;
|
140 |
140 |
boolean flagWord = false;
|
141 |
141 |
int firstElement = 0;
|
... | ... | |
149 |
149 |
// println "checkTags=$checkTags";
|
150 |
150 |
// println "parser=$parser";
|
151 |
151 |
firstElement++;
|
152 |
|
|
|
152 |
|
153 |
153 |
if (this.checkTags.containsKey(parser.getLocalName())) {
|
154 |
154 |
this.checkTags.put(parser.getLocalName(), true);
|
155 |
155 |
}
|
156 |
|
|
|
156 |
|
157 |
157 |
switch (parser.getLocalName()) {
|
158 |
158 |
case wtag:
|
159 |
159 |
if (!hasText) {
|
... | ... | |
164 |
164 |
}
|
165 |
165 |
idcount++; // increment word counter
|
166 |
166 |
anabalises.clear();
|
167 |
|
|
|
167 |
|
168 |
168 |
writer.writeStartElement(parser.getLocalName()); // write w
|
169 |
|
|
|
169 |
|
170 |
170 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) // write namespaces
|
171 |
171 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
|
172 |
|
|
|
172 |
|
173 |
173 |
for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) { // transform attributes
|
174 |
174 |
String type = parser.getAttributeLocalName(i);
|
175 |
175 |
String value = parser.getAttributeValue(i);
|
... | ... | |
190 |
190 |
// else {
|
191 |
191 |
// wordid = "w_"+textname+"_"+wordid;
|
192 |
192 |
// }
|
193 |
|
|
|
193 |
|
194 |
194 |
wordid = AsciiUtils.buildWordId(wordid); // remove characters not compatible with the id attribute value
|
195 |
|
|
|
195 |
|
196 |
196 |
writer.writeAttribute(type, wordid);
|
197 |
|
|
|
197 |
|
198 |
198 |
} else { // add attributes that was in the original <w>
|
199 |
199 |
if (convertAllAttributes)
|
200 |
200 |
anabalises.add(["#src", "#"+type, value])
|
... | ... | |
202 |
202 |
writer.writeAttribute(type, value);
|
203 |
203 |
}
|
204 |
204 |
}
|
205 |
|
|
|
205 |
|
206 |
206 |
flagWord = true; // start to capture the form
|
207 |
207 |
writer.writeStartElement(TXMNS, "form");
|
208 |
208 |
break;
|
209 |
|
|
|
209 |
|
210 |
210 |
case "TEI":
|
211 |
211 |
super.processStartElement();
|
212 |
212 |
boolean hasTeiNS = false;
|
... | ... | |
223 |
223 |
if (!hasTXMNs)
|
224 |
224 |
writer.writeNamespace(TXM, TXMNS);
|
225 |
225 |
break;
|
226 |
|
|
|
226 |
|
227 |
227 |
default:
|
228 |
|
|
|
228 |
|
229 |
229 |
if (TEXT.equals(localname)) {
|
230 |
230 |
hasText = true;
|
231 |
231 |
}
|
232 |
|
|
|
232 |
|
233 |
233 |
if (firstElement == 1) { // test if first element is TEI
|
234 |
234 |
//println "first tag: "+parser.getLocalName()
|
235 |
235 |
if (localname != "TEI") { // "TEI" is missing
|
... | ... | |
255 |
255 |
textElementAdded = true;
|
256 |
256 |
hasText = true;
|
257 |
257 |
}
|
258 |
|
|
|
258 |
|
259 |
259 |
super.processStartElement();
|
260 |
260 |
if (TEXT.equals(localname)) {
|
261 |
261 |
if (!parser.getAttributeValue(null, ID)) {
|
... | ... | |
264 |
264 |
}
|
265 |
265 |
}
|
266 |
266 |
}
|
267 |
|
|
268 |
|
protected void after()
|
269 |
|
{
|
|
267 |
|
|
268 |
protected void after() {
|
270 |
269 |
if (textElementAdded) {
|
271 |
270 |
writer.writeEndElement(); // text
|
272 |
271 |
}
|
... | ... | |
275 |
274 |
}
|
276 |
275 |
super.after(); // close writer, parser, etc
|
277 |
276 |
}
|
278 |
|
|
|
277 |
|
279 |
278 |
protected void addTEIElement()
|
280 |
279 |
{
|
281 |
280 |
writer.writeStartElement("TEI");
|
... | ... | |
284 |
283 |
writer.writeNamespace(TEI, TEINS);
|
285 |
284 |
writeTeiHeader();
|
286 |
285 |
}
|
287 |
|
|
288 |
|
protected void processCharacters()
|
|
286 |
|
|
287 |
protected void processCharacters()
|
289 |
288 |
{
|
290 |
289 |
if (flagWord) {
|
291 |
290 |
writer.writeCharacters(parser.getText().trim()); // keep form in 1 line
|
... | ... | |
293 |
292 |
super.processCharacters();
|
294 |
293 |
}
|
295 |
294 |
}
|
296 |
|
|
|
295 |
|
297 |
296 |
boolean hasClassDecl = false;
|
298 |
297 |
boolean hasFileDesc = false;
|
299 |
298 |
boolean hasEncodingDesc = false;
|
... | ... | |
315 |
314 |
writer.writeCharacters(values[2]);
|
316 |
315 |
writer.writeEndElement(); // txm:ana
|
317 |
316 |
}
|
318 |
|
|
|
317 |
|
319 |
318 |
flagWord = false;
|
320 |
319 |
break;
|
321 |
|
|
|
320 |
|
322 |
321 |
case "fileDesc":
|
323 |
322 |
hasFileDesc = true;
|
324 |
323 |
this.writeTXMResps();
|
325 |
324 |
break;
|
326 |
|
|
|
325 |
|
327 |
326 |
case "classDecl":
|
328 |
327 |
hasClassDecl=true;
|
329 |
328 |
this.writeTXMTaxonomies();
|
... | ... | |
332 |
331 |
hasEncodingDesc = true;
|
333 |
332 |
writeContentOfEncodingDesc();
|
334 |
333 |
break;
|
335 |
|
|
|
334 |
|
336 |
335 |
case "teiHeader":
|
337 |
336 |
hasTeiHeader = true
|
338 |
337 |
if (!hasEncodingDesc) {
|
... | ... | |
340 |
339 |
writeContentOfEncodingDesc();
|
341 |
340 |
writer.writeEndElement();
|
342 |
341 |
}
|
343 |
|
|
|
342 |
|
344 |
343 |
break;
|
345 |
344 |
case "TEI":
|
346 |
345 |
hasTEI = true;
|
... | ... | |
349 |
348 |
}
|
350 |
349 |
break;
|
351 |
350 |
}
|
352 |
|
|
|
351 |
|
353 |
352 |
super.processEndElement();
|
354 |
353 |
}
|
355 |
|
|
|
354 |
|
356 |
355 |
protected void writeTeiHeader()
|
357 |
356 |
{
|
358 |
357 |
writer.writeStartElement("teiHeader");
|
... | ... | |
372 |
371 |
writer.writeEndElement(); // encodingDesc
|
373 |
372 |
writer.writeEndElement(); // teiHeader
|
374 |
373 |
}
|
375 |
|
|
|
374 |
|
376 |
375 |
protected void writeContentOfEncodingDesc()
|
377 |
376 |
{
|
378 |
377 |
writer.writeStartElement("appInfo")
|
... | ... | |
384 |
383 |
writer.writeEndElement(); // classDecl
|
385 |
384 |
}
|
386 |
385 |
}
|
387 |
|
|
|
386 |
|
388 |
387 |
/**
|
389 |
388 |
* Check resp.
|
390 |
389 |
*
|
... | ... | |
397 |
396 |
rez += "\t"+key+"\n";
|
398 |
397 |
return rez;
|
399 |
398 |
}
|
400 |
|
|
|
399 |
|
401 |
400 |
/**
|
402 |
401 |
* Sets the correspondances.
|
403 |
402 |
*
|
... | ... | |
409 |
408 |
this.correspRef = correspRef;
|
410 |
409 |
this.correspType = correspType;
|
411 |
410 |
}
|
412 |
|
|
|
411 |
|
413 |
412 |
/**
|
414 |
413 |
* Sets the header infos.
|
415 |
414 |
*
|
... | ... | |
427 |
426 |
this.taxonomies = taxonomies;
|
428 |
427 |
this.items = items;
|
429 |
428 |
}
|
430 |
|
|
|
429 |
|
431 |
430 |
/**
|
432 |
431 |
* Write txm resps.
|
433 |
432 |
*/
|
... | ... | |
451 |
450 |
writer.writeEndElement(); //respStmt
|
452 |
451 |
}
|
453 |
452 |
}
|
454 |
|
|
|
453 |
|
455 |
454 |
/**
|
456 |
455 |
* Write txm apps.
|
457 |
456 |
*/
|
... | ... | |
462 |
461 |
String ident = list.get(0);
|
463 |
462 |
String version = list.get(1);
|
464 |
463 |
File report = list.get(2);
|
465 |
|
|
|
464 |
|
466 |
465 |
writer.writeStartElement(TXMNS, "application");
|
467 |
466 |
writer.writeAttribute("ident", ident);
|
468 |
467 |
writer.writeAttribute("version", version);
|
469 |
468 |
writer.writeAttribute(RESP, ref);
|
470 |
|
|
|
469 |
|
471 |
470 |
//get txm:commandLine from GeneratedReport
|
472 |
471 |
if (report != null) {
|
473 |
472 |
writer.writeCharacters("");writer.flush();
|
... | ... | |
480 |
479 |
}
|
481 |
480 |
reader.close();
|
482 |
481 |
}
|
483 |
|
|
|
482 |
|
484 |
483 |
writer.writeStartElement("ab");
|
485 |
484 |
writer.writeAttribute(TYPE, "annotation");
|
486 |
485 |
for (String item : taxonomies.get(ref)) {
|
... | ... | |
494 |
493 |
writer.writeEndElement(); // txm:application
|
495 |
494 |
}
|
496 |
495 |
}
|
497 |
|
|
|
496 |
|
498 |
497 |
/**
|
499 |
498 |
* Write txm taxonomies.
|
500 |
499 |
*/
|
... | ... | |
503 |
502 |
for (String tax : items.keySet()) {
|
504 |
503 |
writer.writeStartElement("taxonomy");
|
505 |
504 |
writer.writeAttribute(ID, tax);
|
506 |
|
|
|
505 |
|
507 |
506 |
writer.writeStartElement("bibl");
|
508 |
507 |
writer.writeAttribute(TYPE, "tagset");
|
509 |
508 |
writer.writeStartElement("title");
|
510 |
509 |
writer.writeCharacters(tax);
|
511 |
510 |
writer.writeEndElement(); // title
|
512 |
|
|
|
511 |
|
513 |
512 |
for (String type : items.get(tax).keySet()) {
|
514 |
513 |
writer.writeEmptyElement("ref");
|
515 |
514 |
writer.writeAttribute(TYPE, type);
|
... | ... | |
519 |
518 |
writer.writeEndElement(); // taxonomy
|
520 |
519 |
}
|
521 |
520 |
}
|
522 |
|
|
|
521 |
|
523 |
522 |
/**
|
524 |
523 |
* The main method.
|
525 |
524 |
*
|
526 |
525 |
* @param args the arguments
|
527 |
526 |
*/
|
528 |
527 |
public static void main(String[] args) {
|
529 |
|
|
|
528 |
|
530 |
529 |
String rootDir = "~/xml/rgaqcj/";
|
531 |
530 |
new File(rootDir+"anainline/").mkdir();
|
532 |
|
|
|
531 |
|
533 |
532 |
ArrayList<String> milestones = new ArrayList<String>();
|
534 |
|
|
|
533 |
|
535 |
534 |
String file = "roland-p5.xml";
|
536 |
535 |
String anafile = "roland-p5.xml";
|
537 |
|
|
|
536 |
|
538 |
537 |
def correspType = new HashMap<String,String>()
|
539 |
538 |
// correspType(attribut word wlx, attribut type de la propriété ana du w txm)
|
540 |
539 |
correspType.put("p2","CATTEX2009");
|
541 |
|
|
|
540 |
|
542 |
541 |
def correspRef = new HashMap<String,String>()
|
543 |
542 |
// correspRef (attribut word wlx, attribut ref de la propriété ana du w txm. ref pointe vers l'identifiant du respStmt du TEIheader)
|
544 |
543 |
correspRef.put("p2","ctx1");
|
545 |
|
|
|
544 |
|
546 |
545 |
//il faut lister les id de tous les respStmt
|
547 |
546 |
def respId = ["ctx1"];//,"TT1", "TnT1"];
|
548 |
|
|
|
547 |
|
549 |
548 |
//fait la correspondance entre le respId et le rapport d'execution de l'outil
|
550 |
549 |
def applications = new HashMap<String,HashMap<String,String>>();
|
551 |
550 |
applications.put("ctx1",new ArrayList<String>());
|
552 |
551 |
applications.get("ctx1").add("Oxygen");//app ident
|
553 |
552 |
applications.get("ctx1").add("9.3");//app version
|
554 |
553 |
applications.get("ctx1").add(null);//app report file path
|
555 |
|
|
|
554 |
|
556 |
555 |
//fait la correspondance entre le respId et les attributs type de la propriété ana du w txm
|
557 |
556 |
//pour construire les ref vers les taxonomies
|
558 |
557 |
def taxonomiesUtilisees = new HashMap<String,String[]>();
|
559 |
558 |
taxonomiesUtilisees.put("ctx1",["CATTEX2009"]);//,"lemma","lasla","grace"]);
|
560 |
|
|
|
559 |
|
561 |
560 |
//associe un id d'item avec sa description et son URI
|
562 |
561 |
def itemsURI = new HashMap<String,HashMap<String,String>>();
|
563 |
562 |
itemsURI.put("CATTEX2009",new HashMap<String,String>());
|
564 |
563 |
itemsURI.get("CATTEX2009").put("tagset","http://bfm.ens-lsh.fr/IMG/xml/cattex2009.xml");
|
565 |
564 |
itemsURI.get("CATTEX2009").put("website","http://bfm.ens-lsh.fr/article.php3?id_article=176");
|
566 |
|
|
|
565 |
|
567 |
566 |
//informations de respStmt
|
568 |
567 |
//resps (respId <voir ci-dessus>, [description, person, date])
|
569 |
568 |
def resps = new HashMap<String,String[]>();
|
... | ... | |
573 |
572 |
"2010-03-02",
|
574 |
573 |
"Tue Mar 2 21:02:55 Paris, Madrid 2010"
|
575 |
574 |
])
|
576 |
|
|
|
575 |
|
577 |
576 |
//lance le traitement
|
578 |
577 |
def builder = new Xml2Ana(new File(rootDir+"/src/",file));
|
579 |
578 |
builder.setCorrespondances(correspRef, correspType);
|
580 |
579 |
builder.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI)
|
581 |
580 |
//dossier de sortie + nom fichier sortie
|
582 |
581 |
builder.process(anafile);
|
583 |
|
|
|
582 |
|
584 |
583 |
return
|
585 |
584 |
}
|
586 |
|
|
|
585 |
|
587 |
586 |
}
|