Révision 2988
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2988) | ||
---|---|---|
220 | 220 |
|
221 | 221 |
String[] pAttributes = pargs |
222 | 222 |
|
223 |
String[] sAttributes = ["txmcorpus:0+lang", uAttr , textAttr, "event:0+id+desc+type+extent", sectionAttr, "sp:0+n+speaker+end+start+overlap+time"];
|
|
223 |
String[] sAttributes = ["txmcorpus:0+lang", uAttr , textAttr, "event:0+id+desc+type+extent", sectionAttr, "sp:0+n+who+end+start+overlap+time"];
|
|
224 | 224 |
|
225 | 225 |
println "pAttributes: $pAttributes" |
226 | 226 |
println "sAttributes: $sAttributes" |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TRSToTEI.groovy (revision 2988) | ||
---|---|---|
204 | 204 |
|
205 | 205 |
writer.writeAttribute("start", time) |
206 | 206 |
writer.writeAttribute("end", parser.getAttributeValue(null, "endTime")) |
207 |
writeAttributes(); |
|
207 |
|
|
208 |
writer.writeAttribute("who", parser.getAttributeValue(null, "speaker")) |
|
209 |
|
|
210 |
for (int i = 0; i < parser.getAttributeCount(); i++) { // write other attributes if any |
|
211 |
String v = parser.getAttributeLocalName(i); |
|
212 |
if (!("who".equals(v)) && !("overlap".equals(v)) && !("time".equals(v)) && !("speaker".equals(v)) && !("endTime".equals(v)) && !("startTime".equals(v))) { |
|
213 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
214 |
} |
|
215 |
} |
|
208 | 216 |
break; |
209 | 217 |
case "Sync": // >> u |
210 | 218 |
lastTime = parser.getAttributeValue(null, "time") |
tmp/org.txm.edition.rcp/src/org/txm/edition/rcp/handlers/SendEditionTo.java (revision 2988) | ||
---|---|---|
66 | 66 |
IEditionEditor editor = (IEditionEditor) SWTEditorsUtils.getActiveEditor(event); |
67 | 67 |
String query = null; |
68 | 68 |
if (editor instanceof SynopticEditionEditor) { |
69 |
SynopticEditionEditor seditor = (SynopticEditionEditor)editor; |
|
69 |
SynopticEditionEditor seditor = (SynopticEditionEditor) editor;
|
|
70 | 70 |
String[] wordids = seditor.getWordSelection(); |
71 | 71 |
if (wordids == null) { // last resort |
72 |
query = ((IEditionEditor)editor).getTextSelection(); |
|
73 |
} else { |
|
72 |
query = editor.getTextSelection(); |
|
73 |
} |
|
74 |
else { |
|
74 | 75 |
|
75 | 76 |
try { |
76 | 77 |
Edition edition = seditor.getEditionPanel(0).getEdition(); |
77 | 78 |
Text text = edition.getText(); |
78 | 79 |
Project project = text.getParent(); |
79 | 80 |
String textid = edition.getText().getName(); |
80 |
|
|
81 |
CQPCorpus corpus = (CQPCorpus) project.getCorpusBuild(null, MainCorpus.class); |
|
82 |
|
|
81 |
CQPCorpus corpus = seditor.getCorpus();// (CQPCorpus) project.getCorpusBuild(null, MainCorpus.class); |
|
82 |
if (corpus == null) { |
|
83 |
String ncorpus = edition.getStringParameterValue("corpus"); |
|
84 |
if (ncorpus != null && ncorpus.length() > 0) { |
|
85 |
corpus = (CQPCorpus) project.getCorpusBuild(ncorpus); |
|
86 |
} |
|
87 |
else { // last resort |
|
88 |
corpus = (CQPCorpus) project.getCorpusBuild(null, MainCorpus.class); |
|
89 |
} |
|
90 |
} |
|
83 | 91 |
Match m = null; |
84 | 92 |
Match n = null; |
85 | 93 |
|
... | ... | |
95 | 103 |
|
96 | 104 |
m = rez.getMatch(0); |
97 | 105 |
n = rez.getMatch(0); |
98 |
} else if (wordids[0] != null && wordids[1] != null) { |
|
99 |
CQLQuery cqlQuery = new CQLQuery(NLS.bind("[id=\"{0}\" & _.text_id=\"{1}\"]", wordids[0]+"|"+wordids[1], textid)); |
|
106 |
} |
|
107 |
else if (wordids[0] != null && wordids[1] != null) { |
|
108 |
CQLQuery cqlQuery = new CQLQuery(NLS.bind("[id=\"{0}\" & _.text_id=\"{1}\"]", wordids[0] + "|" + wordids[1], textid)); |
|
100 | 109 |
QueryResult rez = corpus.query(cqlQuery, "TMP", false); |
101 | 110 |
|
102 | 111 |
if (rez.getNMatch() != 2) { |
103 |
Log.warning(NLS.bind("Warning: found more than or no word for id={0} in {1}", wordids[0]+", "+wordids[1], textid));
|
|
112 |
Log.warning(NLS.bind("Warning: found more than or no word for id={0} in {1}", wordids[0] + ", " + wordids[1], textid));
|
|
104 | 113 |
return null; |
105 | 114 |
} |
106 | 115 |
|
... | ... | |
115 | 124 |
if (words != null && words.length > 0) { |
116 | 125 |
query = ""; |
117 | 126 |
for (String w : words) { |
118 |
query += " \""+CQLQuery.addBackSlash(w)+"\"";
|
|
127 |
query += " \"" + CQLQuery.addBackSlash(w) + "\"";
|
|
119 | 128 |
} |
120 | 129 |
query = query.substring(1); |
121 | 130 |
} |
122 | 131 |
} |
123 |
} catch (Exception e) { |
|
124 |
Log.warning("Error: "+e); |
|
132 |
} |
|
133 |
catch (Exception e) { |
|
134 |
Log.warning("Error: " + e); |
|
125 | 135 |
Log.printStackTrace(e); |
126 | 136 |
} |
127 | 137 |
} |
128 | 138 |
return query; |
129 |
} else { // try using text selection of the current editor |
|
130 |
query = ((IEditionEditor)editor).getTextSelection(); |
|
139 |
} |
|
140 |
else { // try using text selection of the current editor |
|
141 |
query = editor.getTextSelection(); |
|
131 | 142 |
query = query.replaceAll("\n", "").trim(); //$NON-NLS-1$ //$NON-NLS-2$ |
132 | 143 |
query = "\"" + CQLQuery.addBackSlash(query) + "\""; //$NON-NLS-1$ //$NON-NLS-2$ |
133 | 144 |
return query; |
tmp/org.txm.core/src/java/org/txm/importer/scripts/xmltxm/AnnotationInjection.groovy (revision 2988) | ||
---|---|---|
1 |
|
|
2 |
|
|
3 | 1 |
// Copyright © 2010-2013 ENS de Lyon. |
4 | 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
5 | 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
... | ... | |
49 | 47 |
*/ |
50 | 48 |
|
51 | 49 |
public class AnnotationInjection extends StaxIdentityParser { |
52 |
|
|
50 |
|
|
53 | 51 |
public static String TXMNS = "http://textometrie.org/1.0" |
54 |
|
|
52 |
|
|
55 | 53 |
/** The xml reader factory. */ |
56 | 54 |
private def factory; |
57 |
|
|
55 |
|
|
58 | 56 |
/** The links. */ |
59 | 57 |
private LinkedHashSet<String> links; |
58 |
|
|
60 | 59 |
/** |
61 | 60 |
* if set, existing values are replaced |
62 | 61 |
*/ |
63 | 62 |
boolean fixExistingValues |
64 |
|
|
63 |
|
|
65 | 64 |
/** The linkparsers. key=type*/ |
66 | 65 |
private LinkedHashMap<String, XMLStreamReader> linkparsers; |
67 |
|
|
66 |
|
|
68 | 67 |
/** The anaurl. */ |
69 | 68 |
private def anaurl; |
70 |
|
|
69 |
|
|
71 | 70 |
/** The anainput data. */ |
72 | 71 |
private def anainputData; |
73 |
|
|
72 |
|
|
74 | 73 |
/** The anafactory. */ |
75 | 74 |
private XMLInputFactory anafactory = XMLInputFactory.newInstance(); |
76 |
|
|
75 |
|
|
77 | 76 |
/** The anaparser. */ |
78 | 77 |
private XMLStreamReader anaparser; |
79 | 78 |
private XMLStreamReader headerparser; |
80 |
|
|
79 |
|
|
81 | 80 |
/** The resp stmt id. */ |
82 | 81 |
String respStmtID = ""; |
83 |
|
|
82 |
|
|
84 | 83 |
/** The present taxonomies. */ |
85 | 84 |
ArrayList<String> presentTaxonomies = new ArrayList(); |
86 |
|
|
85 |
|
|
87 | 86 |
/** |
88 | 87 |
* Instantiates a new annotation injection. |
89 | 88 |
* |
... | ... | |
93 | 92 |
public AnnotationInjection(URL url, URL anaurl) { |
94 | 93 |
this(url, anaurl, false) |
95 | 94 |
} |
96 |
|
|
95 |
|
|
97 | 96 |
/** |
98 | 97 |
* Instantiates a new annotation injection. |
99 | 98 |
* |
... | ... | |
113 | 112 |
System.out.println("IOException while parsing "); |
114 | 113 |
} |
115 | 114 |
} |
116 |
|
|
115 |
|
|
117 | 116 |
private void getHeaderInfos(String containertag, boolean captureTheTag) |
118 | 117 |
{ |
119 | 118 |
anainputData = new BufferedInputStream(anaurl.openStream()); |
... | ... | |
144 | 143 |
localname = headerparser.getLocalName(); |
145 | 144 |
if (!captureTheTag && localname == containertag) |
146 | 145 |
break;// stop looping |
147 |
|
|
146 |
|
|
148 | 147 |
if (start) |
149 | 148 |
writer.writeEndElement(); |
150 |
|
|
149 |
|
|
151 | 150 |
if (captureTheTag && localname == containertag) |
152 | 151 |
break;// stop looping |
153 | 152 |
} else if (event == XMLStreamConstants.CHARACTERS) { |
... | ... | |
161 | 160 |
headerparser.close(); |
162 | 161 |
anainputData.close(); |
163 | 162 |
} |
164 |
|
|
163 |
|
|
165 | 164 |
/** |
166 | 165 |
* find all refs. |
167 | 166 |
* |
... | ... | |
172 | 171 |
LinkedHashSet<String> links = new LinkedHashSet<String>(); |
173 | 172 |
anainputData = anaurl.openStream(); |
174 | 173 |
anaparser = anafactory.createXMLStreamReader(anainputData); |
175 |
|
|
174 |
|
|
176 | 175 |
for (int event = anaparser.next(); event != XMLStreamConstants.END_DOCUMENT; event = anaparser.next()) { |
177 | 176 |
if (event == XMLStreamConstants.START_ELEMENT) { |
178 | 177 |
if (anaparser.getLocalName().equals("linkGrp")) { |
179 |
String targetsvalue = anaparser.getAttributeValue(0) |
|
180 |
|
|
178 |
String targetsvalue = "txm:"+anaparser.getAttributeValue(0)
|
|
179 |
|
|
181 | 180 |
if (links.contains(targetsvalue)) { |
182 | 181 |
System.err.println("Warning: Multiple group declaration : "+targetsvalue+" has already been added, the first one will be used") |
183 | 182 |
} else { |
... | ... | |
188 | 187 |
} |
189 | 188 |
} |
190 | 189 |
} |
190 |
println links |
|
191 | 191 |
anaparser.close(); |
192 | 192 |
anainputData.close(); |
193 | 193 |
return links; |
194 | 194 |
} |
195 |
|
|
195 |
|
|
196 | 196 |
/** |
197 | 197 |
* Builds the link parsers. |
198 | 198 |
* I need to know what groups exists to build a parser per taxonomy and go to the first link element |
... | ... | |
202 | 202 |
// link group of the standoff file |
203 | 203 |
links = findGrpLink(); |
204 | 204 |
linkparsers = new LinkedHashMap<String, XMLStreamReader>(); |
205 |
|
|
205 |
|
|
206 | 206 |
// build one parser per link group |
207 | 207 |
for (String link : links) { // build a parser per group |
208 | 208 |
anainputData = new BufferedInputStream(anaurl.openStream()); |
209 | 209 |
linkparsers.put(link, anafactory.createXMLStreamReader(anainputData)); |
210 | 210 |
} |
211 |
|
|
211 |
|
|
212 | 212 |
//for each parser |
213 | 213 |
for (String link : links) { |
214 | 214 |
anaparser = linkparsers.get(link); |
215 | 215 |
for (int event = anaparser.next(); event != XMLStreamConstants.END_DOCUMENT; event = anaparser.next()) { |
216 | 216 |
if (event == XMLStreamConstants.START_ELEMENT) { |
217 | 217 |
if (anaparser.getLocalName().equals("linkGrp")) { // position the parser to the right group |
218 |
String targetsvalue = anaparser.getAttributeValue(0) |
|
218 |
String targetsvalue = "txm:"+anaparser.getAttributeValue(0) |
|
219 |
//println "target: "+targetsvalue+" link="+link |
|
219 | 220 |
if (targetsvalue.equals(link)) { |
220 | 221 |
break; // next element is a link start tag |
221 | 222 |
} |
... | ... | |
248 | 249 |
} |
249 | 250 |
} |
250 | 251 |
} |
251 |
|
|
252 |
|
|
252 | 253 |
/** |
253 | 254 |
* get the next tei:link value of a tei:LinkGrp. |
254 | 255 |
* |
... | ... | |
256 | 257 |
* @return the next ana |
257 | 258 |
*/ |
258 | 259 |
private String getNextAnaValue(String link, String wordId) { |
260 |
//println "GET ANAPARSER of link=$link linkparsers="+linkparsers.keySet() |
|
259 | 261 |
anaparser = linkparsers.get(link); |
262 |
if (anaparser == null) return null; |
|
260 | 263 |
def m; |
261 | 264 |
for (int event = anaparser.next(); event != XMLStreamConstants.END_DOCUMENT; event = anaparser.next()) { |
262 | 265 |
if (event == XMLStreamConstants.START_ELEMENT) { |
... | ... | |
265 | 268 |
if ((m = targetsvalue =~ /#(.*) #(.*)/)) { // balise externe |
266 | 269 |
def g1 = m[0][1]; |
267 | 270 |
def g2 = m[0][2]; |
268 |
|
|
271 |
|
|
269 | 272 |
String anavalue = g2; |
270 | 273 |
anavalue = anavalue.replace("<", "<") |
271 | 274 |
return anavalue; |
... | ... | |
277 | 280 |
} |
278 | 281 |
return ""; |
279 | 282 |
} |
280 |
|
|
283 |
|
|
281 | 284 |
/** |
282 | 285 |
* build the ana tags of a word. |
283 | 286 |
* |
284 | 287 |
* @param wordId the word id |
285 | 288 |
* @return the ana tag |
286 | 289 |
*/ |
287 |
private void writeAnaTags(String wordId) |
|
288 |
{ |
|
290 |
private void writeAnaTags(String wordId) {
|
|
291 |
|
|
289 | 292 |
String anabalises ="\n"; |
290 | 293 |
for (String link : links) { |
291 |
writer.writeStartElement(TXMNS, "ana"); |
|
292 |
writer.writeAttribute("resp", "#"+respStmtID); |
|
293 |
writer.writeAttribute("type", "#"+link); |
|
294 | 294 |
|
295 |
int idx = link.indexOf(":") |
|
296 |
|
|
297 |
String resp = link.substring(0, idx) |
|
298 |
String type = link.substring(idx+1) |
|
299 |
|
|
300 |
|
|
295 | 301 |
String newValue = getNextAnaValue(link, wordId) |
296 |
if (fixExistingValues || anaValues.containsKey(link) == null) { |
|
297 |
anaValues.put(link, newValue) |
|
298 |
} else { |
|
299 |
anaValues.put(link, "") |
|
302 |
//println "ANA="+anaValues.get(link)+" new=$newValue" |
|
303 |
if (newValue!= null) { |
|
304 |
if (anaValues.get(link) == null || fixExistingValues) { |
|
305 |
anaValues.put(link, newValue) |
|
306 |
} |
|
300 | 307 |
} |
301 | 308 |
|
302 |
writer.writeCharacters(anaValues.get(link)); |
|
303 |
|
|
304 |
writer.writeEndElement(); // txm:ana |
|
309 |
if (anaValues.get(link) != null) { // there was no value in SRC XML and in injected values |
|
310 |
writer.writeStartElement(TXMNS, "ana"); |
|
311 |
writer.writeAttribute("resp", "#"+resp); |
|
312 |
writer.writeAttribute("type", "#"+type); |
|
313 |
writer.writeCharacters(anaValues.get(link)); |
|
314 |
writer.writeEndElement(); // txm:ana |
|
315 |
} |
|
305 | 316 |
} |
317 |
anaValues.clear() |
|
306 | 318 |
} |
307 |
|
|
319 |
|
|
308 | 320 |
String wordId; |
309 | 321 |
HashMap<String, String> anaValues = new HashMap<String, String>(); |
310 | 322 |
boolean flagSourceDesc = false, flagW = false, flagAna = false; |
... | ... | |
339 | 351 |
flagW = true |
340 | 352 |
anaValues.clear() |
341 | 353 |
} |
342 |
|
|
354 |
|
|
343 | 355 |
super.processStartElement(); |
344 | 356 |
} |
345 |
|
|
357 |
|
|
346 | 358 |
protected void processCharacters() { |
347 | 359 |
if (flagAna) anaValue += parser.getText(); |
348 | 360 |
else super.processCharacters(); // FORM CONTENT LOST !!!!!!!!!!!!! |
349 | 361 |
} |
350 |
|
|
362 |
|
|
351 | 363 |
boolean applicationWritten = false; |
352 | 364 |
boolean taxonomiesWritten = false; |
353 | 365 |
protected void processEndElement() { |
... | ... | |
357 | 369 |
flagW = false |
358 | 370 |
break; |
359 | 371 |
case "ana": |
360 |
if (flagAna && replace && type != null && resp != null && anaValue != null) {
|
|
361 |
anaValues.put(type, anaValue) |
|
362 |
links.add(type) |
|
372 |
if (flagAna && type != null && resp != null && anaValue != null) { |
|
373 |
anaValues.put(resp+":"+type, anaValue)
|
|
374 |
links.add(resp+":"+type)
|
|
363 | 375 |
flagAna = false |
364 | 376 |
return; // don't write the "ana" end element |
365 | 377 |
} |
366 | 378 |
flagAna = false |
367 | 379 |
break; |
368 |
|
|
380 |
|
|
369 | 381 |
case "appInfo": |
370 | 382 |
applicationWritten = true; |
371 | 383 |
getHeaderInfos("appInfo", false); |
372 | 384 |
break; |
373 |
|
|
385 |
|
|
374 | 386 |
case "classDecl": |
375 | 387 |
taxonomiesWritten = true; |
376 | 388 |
getHeaderInfos("classDecl", false); |
377 | 389 |
break; |
378 |
|
|
390 |
|
|
379 | 391 |
case "encodingDesc": |
380 | 392 |
if (!applicationWritten) { |
381 | 393 |
writer.writeStartElement("appInfo"); |
... | ... | |
388 | 400 |
writer.writeEndElement(); // classDecl |
389 | 401 |
} |
390 | 402 |
break; |
391 |
|
|
403 |
|
|
392 | 404 |
case "titleStmt": |
393 | 405 |
if (flagSourceDesc) { |
394 | 406 |
//output.write(this.respStmt+"\n") |
... | ... | |
400 | 412 |
} |
401 | 413 |
super.processEndElement(); |
402 | 414 |
} |
403 |
|
|
415 |
|
|
404 | 416 |
/** The declarenamespace. */ |
405 | 417 |
boolean declarenamespace = false; |
406 |
|
|
418 |
|
|
407 | 419 |
/** |
408 | 420 |
* Declare namespace. |
409 | 421 |
* |
... | ... | |
416 | 428 |
declarenamespace = true; |
417 | 429 |
} |
418 | 430 |
} |
419 |
|
|
431 |
|
|
420 | 432 |
/** |
421 | 433 |
* The main method. |
422 | 434 |
* |
423 | 435 |
* @param args the arguments |
424 | 436 |
*/ |
425 | 437 |
public static void main(String[] args) { |
426 |
|
|
438 |
|
|
427 | 439 |
String rootDir = "~/xml/rgaqcj/"; |
428 | 440 |
new File(rootDir + "/injection/").mkdir(); |
429 |
|
|
430 |
def milestones = ["tagUsage", "pb", "lb","catRef"]// the tags who |
|
431 |
|
|
441 |
|
|
442 |
def milestones = [ |
|
443 |
"tagUsage", |
|
444 |
"pb", |
|
445 |
"lb", |
|
446 |
"catRef"]// the tags who |
|
432 | 447 |
File srcfile = new File(rootDir, "/anainline/", "roland.xml"); |
433 | 448 |
File pos1file = new File(rootDir, "/pos/", "rolandTT1-w-ana.xml"); |
434 |
|
|
449 |
|
|
435 | 450 |
File src2file = new File(rootDir, "/injection/", "roland.xml"); |
436 | 451 |
File pos2file = new File(rootDir, "/pos/", "rolandTT2-w-ana.xml"); |
437 |
|
|
452 |
|
|
438 | 453 |
println("process file : " + srcfile + " with : " + pos1file); |
439 | 454 |
def builder = new AnnotationInjection(srcfile.toURI().toURL(), |
440 | 455 |
pos1file.toURI().toURL(), milestones); |
441 | 456 |
builder.transfomFile(new File(rootDir + "/injection/", "roland.xml")); |
442 |
|
|
457 |
|
|
443 | 458 |
println("process file : " + src2file + " with : " + pos1file); |
444 | 459 |
builder = new AnnotationInjection(src2file.toURI().toURL(), pos2file.toURI().toURL(), |
445 | 460 |
milestones); |
446 | 461 |
builder.transfomFile(rootDir + "/injection/", "roland-FINAL.xml"); |
447 |
|
|
462 |
|
|
448 | 463 |
return; |
449 | 464 |
} |
450 | 465 |
} |
tmp/org.txm.core/src/java/org/txm/importer/scripts/xmltxm/Xml2Ana.groovy (revision 2988) | ||
---|---|---|
2 | 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
3 | 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
4 | 4 |
// Sophia Antipolis, University of Paris 3. |
5 |
//
|
|
5 |
// |
|
6 | 6 |
// The TXM platform is free software: you can redistribute it |
7 | 7 |
// and/or modify it under the terms of the GNU General Public |
8 | 8 |
// License as published by the Free Software Foundation, |
9 | 9 |
// either version 2 of the License, or (at your option) any |
10 | 10 |
// later version. |
11 |
//
|
|
11 |
// |
|
12 | 12 |
// The TXM platform is distributed in the hope that it will be |
13 | 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
14 | 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
15 | 15 |
// PURPOSE. See the GNU General Public License for more |
16 | 16 |
// details. |
17 |
//
|
|
17 |
// |
|
18 | 18 |
// You should have received a copy of the GNU General |
19 | 19 |
// Public License along with the TXM platform. If not, see |
20 | 20 |
// http://www.gnu.org/licenses. |
... | ... | |
58 | 58 |
{ |
59 | 59 |
/** The dir. */ |
60 | 60 |
private def dir; |
61 |
|
|
61 |
|
|
62 | 62 |
/** The convert all attributes. */ |
63 | 63 |
private boolean convertAllAttributes = false; |
64 |
|
|
64 |
|
|
65 | 65 |
/** The corresp type. */ |
66 | 66 |
HashMap<String,String> correspType; |
67 |
|
|
67 |
|
|
68 | 68 |
/** The corresp ref. */ |
69 | 69 |
HashMap<String,String> correspRef; |
70 |
|
|
70 |
|
|
71 | 71 |
/** The check tags. */ |
72 | 72 |
HashMap<String,Boolean> checkTags = new HashMap<String,Boolean>(); |
73 |
|
|
73 |
|
|
74 | 74 |
/** The resp id. */ |
75 | 75 |
def respId = []; |
76 |
|
|
76 |
|
|
77 | 77 |
/** The applications. */ |
78 | 78 |
HashMap<String,File> applications; |
79 |
|
|
79 |
|
|
80 | 80 |
/** The taxonomies. */ |
81 | 81 |
HashMap<String,String[]> taxonomies; |
82 |
|
|
82 |
|
|
83 | 83 |
/** The resps. */ |
84 | 84 |
HashMap<String,String[]> resps; |
85 |
|
|
85 |
|
|
86 | 86 |
/** The items. */ |
87 | 87 |
HashMap<String,HashMap<String,String>> items; |
88 |
|
|
88 |
|
|
89 | 89 |
/** The XML headeradded. */ |
90 | 90 |
boolean XMLHeaderadded = false; |
91 | 91 |
String textname; |
... | ... | |
93 | 93 |
|
94 | 94 |
public static final String TEXT = "text" |
95 | 95 |
public static final String ID = "id" |
96 |
|
|
96 |
|
|
97 | 97 |
/** |
98 | 98 |
* Instantiates a new xml2 ana. |
99 | 99 |
* |
... | ... | |
107 | 107 |
int idx = textname.lastIndexOf("."); |
108 | 108 |
if (idx > 0) |
109 | 109 |
textname = textname.substring(0, idx) |
110 |
|
|
111 | 110 |
|
111 |
|
|
112 | 112 |
checkTags.put("respStmt",false); |
113 | 113 |
checkTags.put("titleStmt",false); |
114 | 114 |
checkTags.put("appInfo",false); |
115 |
|
|
115 |
|
|
116 | 116 |
hasText = new HasElement(file, TEXT).process(); |
117 | 117 |
} |
118 |
|
|
118 |
|
|
119 | 119 |
/** |
120 | 120 |
* Sets the convert all atrtibutes. |
121 | 121 |
* |
... | ... | |
135 | 135 |
public setWordTag(String wtag) { |
136 | 136 |
this.wtag = wtag |
137 | 137 |
} |
138 |
|
|
138 |
|
|
139 | 139 |
int idcount = 0; |
140 | 140 |
boolean flagWord = false; |
141 | 141 |
int firstElement = 0; |
... | ... | |
146 | 146 |
def anabalises = []; |
147 | 147 |
protected void processStartElement() |
148 | 148 |
{ |
149 |
// println "checkTags=$checkTags"; |
|
150 |
// println "parser=$parser"; |
|
149 |
// println "checkTags=$checkTags";
|
|
150 |
// println "parser=$parser";
|
|
151 | 151 |
firstElement++; |
152 | 152 |
|
153 | 153 |
if (this.checkTags.containsKey(parser.getLocalName())) { |
... | ... | |
164 | 164 |
} |
165 | 165 |
idcount++; // increment word counter |
166 | 166 |
anabalises.clear(); |
167 |
|
|
167 |
|
|
168 | 168 |
writer.writeStartElement(parser.getLocalName()); // write w |
169 |
|
|
169 |
|
|
170 | 170 |
for (int i = 0 ; i < parser.getNamespaceCount() ; i++) // write namespaces |
171 | 171 |
writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i)); |
172 |
|
|
172 |
|
|
173 | 173 |
for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) { // transform attributes |
174 | 174 |
String type = parser.getAttributeLocalName(i); |
175 | 175 |
String value = parser.getAttributeValue(i); |
176 | 176 |
if (correspType.containsKey(type)) { // check if txm:ana |
177 | 177 |
String corresptype = correspType.get(type); |
178 | 178 |
String ref = correspRef.get(type); |
179 |
anabalises.add(["#"+ref, "#"+corresptype, value]); |
|
180 |
} else if (type == ID) { // keep id attribute |
|
179 |
anabalises.add([ |
|
180 |
"#"+ref, |
|
181 |
"#"+corresptype, |
|
182 |
value |
|
183 |
]); |
|
184 |
} else if (type == ID) { // keep id attribute |
|
181 | 185 |
String wordid = value |
182 | 186 |
if (wordid.startsWith("w")) { |
183 | 187 |
if (!wordid.startsWith("w_")) |
184 | 188 |
wordid = "w_"+wordid.substring(1) |
185 |
}
|
|
186 |
// else { |
|
187 |
// wordid = "w_"+textname+"_"+wordid; |
|
188 |
// } |
|
189 |
} |
|
190 |
// else {
|
|
191 |
// wordid = "w_"+textname+"_"+wordid;
|
|
192 |
// }
|
|
189 | 193 |
|
190 | 194 |
wordid = AsciiUtils.buildWordId(wordid); // remove characters not compatible with the id attribute value |
191 | 195 |
|
... | ... | |
193 | 197 |
|
194 | 198 |
} else { // add attributes that was in the original <w> |
195 | 199 |
if (convertAllAttributes) |
196 |
anabalises.add(["none","#"+type, value])
|
|
200 |
anabalises.add(["#src", "#"+type, value])
|
|
197 | 201 |
else |
198 | 202 |
writer.writeAttribute(type, value); |
199 | 203 |
} |
200 | 204 |
} |
201 |
|
|
205 |
|
|
202 | 206 |
flagWord = true; // start to capture the form |
203 | 207 |
writer.writeStartElement(TXMNS, "form"); |
204 | 208 |
break; |
205 |
|
|
209 |
|
|
206 | 210 |
case "TEI": |
207 | 211 |
super.processStartElement(); |
208 | 212 |
boolean hasTeiNS = false; |
... | ... | |
219 | 223 |
if (!hasTXMNs) |
220 | 224 |
writer.writeNamespace(TXM, TXMNS); |
221 | 225 |
break; |
222 |
|
|
226 |
|
|
223 | 227 |
default: |
224 |
|
|
228 |
|
|
225 | 229 |
if (TEXT.equals(localname)) { |
226 | 230 |
hasText = true; |
227 | 231 |
} |
228 |
|
|
232 |
|
|
229 | 233 |
if (firstElement == 1) { // test if first element is TEI |
230 | 234 |
//println "first tag: "+parser.getLocalName() |
231 | 235 |
if (localname != "TEI") { // "TEI" is missing |
... | ... | |
251 | 255 |
textElementAdded = true; |
252 | 256 |
hasText = true; |
253 | 257 |
} |
254 |
|
|
258 |
|
|
255 | 259 |
super.processStartElement(); |
256 | 260 |
if (TEXT.equals(localname)) { |
257 | 261 |
if (!parser.getAttributeValue(null, ID)) { |
... | ... | |
260 | 264 |
} |
261 | 265 |
} |
262 | 266 |
} |
263 |
|
|
267 |
|
|
264 | 268 |
protected void after() |
265 | 269 |
{ |
266 | 270 |
if (textElementAdded) { |
... | ... | |
271 | 275 |
} |
272 | 276 |
super.after(); // close writer, parser, etc |
273 | 277 |
} |
274 |
|
|
278 |
|
|
275 | 279 |
protected void addTEIElement() |
276 | 280 |
{ |
277 | 281 |
writer.writeStartElement("TEI"); |
... | ... | |
280 | 284 |
writer.writeNamespace(TEI, TEINS); |
281 | 285 |
writeTeiHeader(); |
282 | 286 |
} |
283 |
|
|
287 |
|
|
284 | 288 |
protected void processCharacters() |
285 | 289 |
{ |
286 | 290 |
if (flagWord) { |
... | ... | |
311 | 315 |
writer.writeCharacters(values[2]); |
312 | 316 |
writer.writeEndElement(); // txm:ana |
313 | 317 |
} |
314 |
|
|
318 |
|
|
315 | 319 |
flagWord = false; |
316 | 320 |
break; |
317 |
|
|
321 |
|
|
318 | 322 |
case "fileDesc": |
319 | 323 |
hasFileDesc = true; |
320 | 324 |
this.writeTXMResps(); |
321 | 325 |
break; |
322 |
|
|
326 |
|
|
323 | 327 |
case "classDecl": |
324 | 328 |
hasClassDecl=true; |
325 | 329 |
this.writeTXMTaxonomies(); |
... | ... | |
328 | 332 |
hasEncodingDesc = true; |
329 | 333 |
writeContentOfEncodingDesc(); |
330 | 334 |
break; |
331 |
|
|
335 |
|
|
332 | 336 |
case "teiHeader": |
333 | 337 |
hasTeiHeader = true |
334 | 338 |
if (!hasEncodingDesc) { |
... | ... | |
336 | 340 |
writeContentOfEncodingDesc(); |
337 | 341 |
writer.writeEndElement(); |
338 | 342 |
} |
339 |
|
|
343 |
|
|
340 | 344 |
break; |
341 | 345 |
case "TEI": |
342 | 346 |
hasTEI = true; |
... | ... | |
345 | 349 |
} |
346 | 350 |
break; |
347 | 351 |
} |
348 |
|
|
352 |
|
|
349 | 353 |
super.processEndElement(); |
350 | 354 |
} |
351 |
|
|
355 |
|
|
352 | 356 |
protected void writeTeiHeader() |
353 | 357 |
{ |
354 | 358 |
writer.writeStartElement("teiHeader"); |
... | ... | |
368 | 372 |
writer.writeEndElement(); // encodingDesc |
369 | 373 |
writer.writeEndElement(); // teiHeader |
370 | 374 |
} |
371 |
|
|
375 |
|
|
372 | 376 |
protected void writeContentOfEncodingDesc() |
373 | 377 |
{ |
374 | 378 |
writer.writeStartElement("appInfo") |
... | ... | |
380 | 384 |
writer.writeEndElement(); // classDecl |
381 | 385 |
} |
382 | 386 |
} |
383 |
|
|
387 |
|
|
384 | 388 |
/** |
385 | 389 |
* Check resp. |
386 | 390 |
* |
... | ... | |
393 | 397 |
rez += "\t"+key+"\n"; |
394 | 398 |
return rez; |
395 | 399 |
} |
396 |
|
|
400 |
|
|
397 | 401 |
/** |
398 | 402 |
* Sets the correspondances. |
399 | 403 |
* |
... | ... | |
405 | 409 |
this.correspRef = correspRef; |
406 | 410 |
this.correspType = correspType; |
407 | 411 |
} |
408 |
|
|
412 |
|
|
409 | 413 |
/** |
410 | 414 |
* Sets the header infos. |
411 | 415 |
* |
... | ... | |
423 | 427 |
this.taxonomies = taxonomies; |
424 | 428 |
this.items = items; |
425 | 429 |
} |
426 |
|
|
430 |
|
|
427 | 431 |
/** |
428 | 432 |
* Write txm resps. |
429 | 433 |
*/ |
... | ... | |
447 | 451 |
writer.writeEndElement(); //respStmt |
448 | 452 |
} |
449 | 453 |
} |
450 |
|
|
454 |
|
|
451 | 455 |
/** |
452 | 456 |
* Write txm apps. |
453 | 457 |
*/ |
... | ... | |
458 | 462 |
String ident = list.get(0); |
459 | 463 |
String version = list.get(1); |
460 | 464 |
File report = list.get(2); |
461 |
|
|
465 |
|
|
462 | 466 |
writer.writeStartElement(TXMNS, "application"); |
463 | 467 |
writer.writeAttribute("ident", ident); |
464 | 468 |
writer.writeAttribute("version", version); |
465 | 469 |
writer.writeAttribute(RESP, ref); |
466 |
|
|
470 |
|
|
467 | 471 |
//get txm:commandLine from GeneratedReport |
468 | 472 |
if (report != null) { |
469 | 473 |
writer.writeCharacters("");writer.flush(); |
... | ... | |
476 | 480 |
} |
477 | 481 |
reader.close(); |
478 | 482 |
} |
479 |
|
|
483 |
|
|
480 | 484 |
writer.writeStartElement("ab"); |
481 | 485 |
writer.writeAttribute(TYPE, "annotation"); |
482 | 486 |
for (String item : taxonomies.get(ref)) { |
... | ... | |
490 | 494 |
writer.writeEndElement(); // txm:application |
491 | 495 |
} |
492 | 496 |
} |
493 |
|
|
497 |
|
|
494 | 498 |
/** |
495 | 499 |
* Write txm taxonomies. |
496 | 500 |
*/ |
... | ... | |
499 | 503 |
for (String tax : items.keySet()) { |
500 | 504 |
writer.writeStartElement("taxonomy"); |
501 | 505 |
writer.writeAttribute(ID, tax); |
502 |
|
|
506 |
|
|
503 | 507 |
writer.writeStartElement("bibl"); |
504 | 508 |
writer.writeAttribute(TYPE, "tagset"); |
505 | 509 |
writer.writeStartElement("title"); |
506 | 510 |
writer.writeCharacters(tax); |
507 | 511 |
writer.writeEndElement(); // title |
508 |
|
|
512 |
|
|
509 | 513 |
for (String type : items.get(tax).keySet()) { |
510 | 514 |
writer.writeEmptyElement("ref"); |
511 | 515 |
writer.writeAttribute(TYPE, type); |
... | ... | |
515 | 519 |
writer.writeEndElement(); // taxonomy |
516 | 520 |
} |
517 | 521 |
} |
518 |
|
|
522 |
|
|
519 | 523 |
/** |
520 | 524 |
* The main method. |
521 | 525 |
* |
522 | 526 |
* @param args the arguments |
523 | 527 |
*/ |
524 | 528 |
public static void main(String[] args) { |
525 |
|
|
529 |
|
|
526 | 530 |
String rootDir = "~/xml/rgaqcj/"; |
527 | 531 |
new File(rootDir+"anainline/").mkdir(); |
528 |
|
|
532 |
|
|
529 | 533 |
ArrayList<String> milestones = new ArrayList<String>(); |
530 |
|
|
534 |
|
|
531 | 535 |
String file = "roland-p5.xml"; |
532 | 536 |
String anafile = "roland-p5.xml"; |
533 |
|
|
537 |
|
|
534 | 538 |
def correspType = new HashMap<String,String>() |
535 | 539 |
// correspType(attribut word wlx, attribut type de la propriété ana du w txm) |
536 | 540 |
correspType.put("p2","CATTEX2009"); |
537 |
|
|
541 |
|
|
538 | 542 |
def correspRef = new HashMap<String,String>() |
539 | 543 |
// correspRef (attribut word wlx, attribut ref de la propriété ana du w txm. ref pointe vers l'identifiant du respStmt du TEIheader) |
540 | 544 |
correspRef.put("p2","ctx1"); |
541 |
|
|
545 |
|
|
542 | 546 |
//il faut lister les id de tous les respStmt |
543 | 547 |
def respId = ["ctx1"];//,"TT1", "TnT1"]; |
544 |
|
|
548 |
|
|
545 | 549 |
//fait la correspondance entre le respId et le rapport d'execution de l'outil |
546 | 550 |
def applications = new HashMap<String,HashMap<String,String>>(); |
547 | 551 |
applications.put("ctx1",new ArrayList<String>()); |
548 | 552 |
applications.get("ctx1").add("Oxygen");//app ident |
549 | 553 |
applications.get("ctx1").add("9.3");//app version |
550 | 554 |
applications.get("ctx1").add(null);//app report file path |
551 |
|
|
555 |
|
|
552 | 556 |
//fait la correspondance entre le respId et les attributs type de la propriété ana du w txm |
553 | 557 |
//pour construire les ref vers les taxonomies |
554 | 558 |
def taxonomiesUtilisees = new HashMap<String,String[]>(); |
555 | 559 |
taxonomiesUtilisees.put("ctx1",["CATTEX2009"]);//,"lemma","lasla","grace"]); |
556 |
|
|
560 |
|
|
557 | 561 |
//associe un id d'item avec sa description et son URI |
558 | 562 |
def itemsURI = new HashMap<String,HashMap<String,String>>(); |
559 | 563 |
itemsURI.put("CATTEX2009",new HashMap<String,String>()); |
560 | 564 |
itemsURI.get("CATTEX2009").put("tagset","http://bfm.ens-lsh.fr/IMG/xml/cattex2009.xml"); |
561 | 565 |
itemsURI.get("CATTEX2009").put("website","http://bfm.ens-lsh.fr/article.php3?id_article=176"); |
562 |
|
|
566 |
|
|
563 | 567 |
//informations de respStmt |
564 | 568 |
//resps (respId <voir ci-dessus>, [description, person, date]) |
565 | 569 |
def resps = new HashMap<String,String[]>(); |
566 |
resps.put("ctx1", ["initial tagging","alavrentiev","2010-03-02","Tue Mar 2 21:02:55 Paris, Madrid 2010"]) |
|
567 |
|
|
570 |
resps.put("ctx1", [ |
|
571 |
"initial tagging", |
|
572 |
"alavrentiev", |
|
573 |
"2010-03-02", |
|
574 |
"Tue Mar 2 21:02:55 Paris, Madrid 2010" |
|
575 |
]) |
|
576 |
|
|
568 | 577 |
//lance le traitement |
569 | 578 |
def builder = new Xml2Ana(new File(rootDir+"/src/",file)); |
570 | 579 |
builder.setCorrespondances(correspRef, correspType); |
571 | 580 |
builder.setHeaderInfos(respId,resps, applications, taxonomiesUtilisees, itemsURI) |
572 | 581 |
//dossier de sortie + nom fichier sortie |
573 | 582 |
builder.process(anafile); |
574 |
|
|
583 |
|
|
575 | 584 |
return |
576 | 585 |
} |
577 |
|
|
586 |
|
|
578 | 587 |
} |
tmp/org.txm.links.rcp/src/org/txm/links/rcp/handlers/SendSelectionToQueryable.java (revision 2988) | ||
---|---|---|
75 | 75 |
String query = this.createQuery(event, selection); |
76 | 76 |
String queries = this.createQueries(event, selection); |
77 | 77 |
|
78 |
if (query.isEmpty() && queries.isEmpty()) {
|
|
78 |
if ((query == null || query.isEmpty()) && (queries == null || queries.isEmpty())) {
|
|
79 | 79 |
Log.warning(Messages.noQueryWasSetCommandCanceled); |
80 | 80 |
return null; |
81 | 81 |
} |
Formats disponibles : Unified diff