Révision 344
tmp/org.txm.annotation.core/.settings/org.eclipse.jdt.core.prefs (revision 344) | ||
---|---|---|
1 |
eclipse.preferences.version=1 |
|
2 |
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled |
|
3 |
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 |
|
4 |
org.eclipse.jdt.core.compiler.compliance=1.6 |
|
5 |
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error |
|
6 |
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error |
|
7 |
org.eclipse.jdt.core.compiler.source=1.6 |
|
0 | 8 |
tmp/org.txm.annotation.core/.classpath (revision 344) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> |
|
5 |
<classpathentry kind="src" path="src"/> |
|
6 |
<classpathentry kind="output" path="bin"/> |
|
7 |
</classpath> |
|
0 | 8 |
tmp/org.txm.annotation.core/META-INF/MANIFEST.MF (revision 344) | ||
---|---|---|
1 |
Manifest-Version: 1.0 |
|
2 |
Bundle-ManifestVersion: 2 |
|
3 |
Bundle-Name: Annotation |
|
4 |
Bundle-SymbolicName: org.txm.annotation.core |
|
5 |
Bundle-Version: 1.0.0.qualifier |
|
6 |
Bundle-Activator: org.txm.annotation.Activator |
|
7 |
Require-Bundle: org.eclipse.ui, |
|
8 |
org.eclipse.core.runtime, |
|
9 |
org.txm.core;bundle-version="0.7.0" |
|
10 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.6 |
|
11 |
Bundle-ActivationPolicy: lazy |
|
12 |
Export-Package: org.txm.annotation, |
|
13 |
org.txm.annotation.conversion, |
|
14 |
org.txm.annotation.repository, |
|
15 |
org.txm.annotation.storage.temporary |
|
0 | 16 |
tmp/org.txm.annotation.core/.project (revision 344) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>org.txm.annotation.core</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationSyMoGIHWriter.java (revision 344) | ||
---|---|---|
1 |
package org.txm.annotation; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.FileWriter; |
|
5 |
import java.io.IOException; |
|
6 |
import java.net.MalformedURLException; |
|
7 |
import java.util.ArrayList; |
|
8 |
import java.util.HashMap; |
|
9 |
import java.util.HashSet; |
|
10 |
import java.util.List; |
|
11 |
|
|
12 |
import javax.xml.stream.XMLInputFactory; |
|
13 |
import javax.xml.stream.XMLOutputFactory; |
|
14 |
import javax.xml.stream.XMLStreamException; |
|
15 |
import javax.xml.stream.XMLStreamWriter; |
|
16 |
|
|
17 |
import org.txm.annotation.repository.AnnotationType; |
|
18 |
import org.txm.importer.StaxIdentityParser; |
|
19 |
|
|
20 |
/** |
|
21 |
* The Class AnnotationStandoffInjector. |
|
22 |
* |
|
23 |
* @author sgedzelman, mdecorde |
|
24 |
* |
|
25 |
* copy a XML-TXM file without annotations elements |
|
26 |
* and creates annotations XML-TEI-SymoGIH annotation files for each annotation author |
|
27 |
* |
|
28 |
*/ |
|
29 |
public class AnnotationSyMoGIHWriter extends StaxIdentityParser { |
|
30 |
|
|
31 |
File xmlStandOffDirectory; |
|
32 |
boolean debug = false; |
|
33 |
|
|
34 |
String currentRef ; |
|
35 |
String currentAuthor ; |
|
36 |
String currentDate; |
|
37 |
String currentStartPos ; |
|
38 |
String currentEndPos ; |
|
39 |
//read xmlFile, to find annotations and update/write to xmlstandofffile |
|
40 |
//order annotations by annotator |
|
41 |
////// order annotations by date |
|
42 |
HashSet<String> types = new HashSet<String>(); |
|
43 |
ArrayList<String> positions; |
|
44 |
HashMap<String, ArrayList<String>> annotationsPositions; |
|
45 |
XMLStreamWriter currentWriter; |
|
46 |
XMLStreamWriter standoffWriter; |
|
47 |
String currentType; |
|
48 |
boolean startAnnotation = false; |
|
49 |
private String filename; |
|
50 |
|
|
51 |
// author -> date -> annotation_values |
|
52 |
HashMap<String, HashMap<String, ArrayList<String[]>>> allannotations = new HashMap<String, HashMap<String, ArrayList<String[]>>>(); |
|
53 |
HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>> allannotationspositions = new HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>>(); |
|
54 |
private String textid; |
|
55 |
|
|
56 |
/** |
|
57 |
* |
|
58 |
* @param xmlStandOffFile |
|
59 |
* @param tokenAnnotations |
|
60 |
* @param annotationsToAddByStartPos |
|
61 |
* @param annotationsToAddByEndPos |
|
62 |
* @param corpus_start_position |
|
63 |
* @param debug |
|
64 |
* @throws XMLStreamException |
|
65 |
* @throws IOException |
|
66 |
*/ |
|
67 |
public AnnotationSyMoGIHWriter(String textid, File xmlFile, File xmlStandOffDirectory, List<AnnotationType> types, boolean debug) throws IOException, XMLStreamException { |
|
68 |
super(xmlFile.toURI().toURL()); // init reader and writer |
|
69 |
this.textid = textid; |
|
70 |
this.filename = xmlFile.getName(); |
|
71 |
this.debug = debug; |
|
72 |
this.xmlStandOffDirectory = xmlStandOffDirectory; |
|
73 |
factory = XMLInputFactory.newInstance(); |
|
74 |
annotationsPositions = new HashMap<String, ArrayList<String>>(); |
|
75 |
//System.out.println("AnnotationStandoff - "); |
|
76 |
for (AnnotationType type : types){ |
|
77 |
this.types.add(type.getId()); //.getName().toLowerCase()); |
|
78 |
//System.out.println("Annotation Types in TXM : "+type.getName().toLowerCase() +" vs "+ type.getId()); |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
/*<TEI xmlns="http://www.tei-c.org/ns/1.0"> |
|
83 |
<teiHeader> |
|
84 |
<fileDesc> |
|
85 |
<titleStmt> |
|
86 |
<title>Title</title> |
|
87 |
</titleStmt> |
|
88 |
<publicationStmt> |
|
89 |
<p>Publication Information</p> |
|
90 |
</publicationStmt> |
|
91 |
<sourceDesc> |
|
92 |
<p>Ce document permet l'annotation sémantique de tous les textes concernant l'association avec des unités de connaissance</p> |
|
93 |
</sourceDesc> |
|
94 |
</fileDesc> |
|
95 |
</teiHeader> |
|
96 |
<text> |
|
97 |
<body> |
|
98 |
<div> |
|
99 |
<div> |
|
100 |
<!-- La date dans le header indique la date d'annotation --> |
|
101 |
<head> |
|
102 |
<date type="annotation_date" when="2016-06-16"/> |
|
103 |
</head> |
|
104 |
<span type="identification d'entités nommées" ana="CoAc56389" |
|
105 |
target="#w_article_baip_1254-0714_1850_num_01_005_974_tei_2152 |
|
106 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2153 |
|
107 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2154 |
|
108 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2155 |
|
109 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2156" /> |
|
110 |
</div> |
|
111 |
</div> |
|
112 |
</body> |
|
113 |
</text> |
|
114 |
</TEI>*/ |
|
115 |
|
|
116 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
117 |
//<coac author="gazelledess" ref="CoAc397" start="5" end="5"> |
|
118 |
|
|
119 |
/*if(localname.startsWith("actr")){ |
|
120 |
System.out.println("Check existence of actr in Corpus !!!! "+localname); |
|
121 |
}*/ |
|
122 |
boolean foundAnnot = false; |
|
123 |
|
|
124 |
|
|
125 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // don't write txm annotation elements |
|
126 |
//System.out.println(" START "+ localname); |
|
127 |
foundAnnot = true; |
|
128 |
currentType = localname; |
|
129 |
//<txm:actr author="gazelledess" ref="PhileasFogg" date="2016-09-05" start="56" end="57"> |
|
130 |
currentAuthor = parser.getAttributeValue(null, "author"); |
|
131 |
currentRef = parser.getAttributeValue(null, "ref"); |
|
132 |
currentStartPos = parser.getAttributeValue(null, "start"); |
|
133 |
currentEndPos = parser.getAttributeValue(null, "end"); |
|
134 |
currentDate = parser.getAttributeValue(null, "date"); |
|
135 |
//annotation is here |
|
136 |
startAnnotation = true; |
|
137 |
positions = new ArrayList<String>(); |
|
138 |
annotationsPositions.put(currentType, positions); |
|
139 |
|
|
140 |
// initialize allannotations |
|
141 |
if (!allannotations.containsKey(currentAuthor)) { |
|
142 |
allannotations.put(currentAuthor, new HashMap<String, ArrayList<String[]>>()); |
|
143 |
allannotationspositions.put(currentAuthor, new HashMap<String, ArrayList<ArrayList<String>>>()); |
|
144 |
} |
|
145 |
HashMap<String, ArrayList<String[]>> authorsAnnotation = allannotations.get(currentAuthor); |
|
146 |
HashMap<String, ArrayList<ArrayList<String>>> authorsAnnotationPositions = allannotationspositions.get(currentAuthor); |
|
147 |
if (!authorsAnnotation.containsKey(currentDate)) { |
|
148 |
authorsAnnotation.put(currentDate, new ArrayList<String[]>()); |
|
149 |
authorsAnnotationPositions.put(currentDate, new ArrayList<ArrayList<String>>()); |
|
150 |
} |
|
151 |
|
|
152 |
storeAnnotation(); |
|
153 |
} |
|
154 |
|
|
155 |
if (!foundAnnot) { |
|
156 |
super.processStartElement(); /// continue writing in file all elements, except the tags that are now in stand-off files |
|
157 |
|
|
158 |
// get words ids of the current annotations |
|
159 |
if (localname.equals("w") && startAnnotation) { |
|
160 |
String id = parser.getAttributeValue(null, "id"); |
|
161 |
for (String typeIn : annotationsPositions.keySet()) { |
|
162 |
positions = annotationsPositions.get(typeIn); |
|
163 |
positions.add(id); |
|
164 |
//System.out.println("Positions of w id="+posW+" for ["+typeIn+"] "); |
|
165 |
} |
|
166 |
} |
|
167 |
} |
|
168 |
} |
|
169 |
|
|
170 |
|
|
171 |
/** |
|
172 |
* ends the current author stand-off file |
|
173 |
* @param currentWriter |
|
174 |
*/ |
|
175 |
private void writeEndStandOffFile(XMLStreamWriter currentWriter){ |
|
176 |
//System.out.println("writeEndStandOffFile ..."); |
|
177 |
|
|
178 |
try { |
|
179 |
currentWriter.writeEndElement(); |
|
180 |
currentWriter.writeEndDocument(); |
|
181 |
|
|
182 |
currentWriter.flush(); |
|
183 |
currentWriter.close(); |
|
184 |
} catch (XMLStreamException e) { |
|
185 |
e.printStackTrace(); |
|
186 |
} |
|
187 |
|
|
188 |
} |
|
189 |
|
|
190 |
/** |
|
191 |
* Create the stand-off file for one author |
|
192 |
* @param file |
|
193 |
* @return |
|
194 |
*/ |
|
195 |
private XMLStreamWriter writeStartStandOffFile(File file){ |
|
196 |
//System.out.println("writeStartStandOffFile ..."); |
|
197 |
|
|
198 |
String ns = "http://www.tei-c.org/ns/1.0"; |
|
199 |
XMLOutputFactory output = XMLOutputFactory.newInstance(); |
|
200 |
XMLStreamWriter writer = null ; |
|
201 |
try { |
|
202 |
writer = output.createXMLStreamWriter(new FileWriter(file)); |
|
203 |
writer.writeStartDocument(); |
|
204 |
writer.setPrefix("tei", ns); |
|
205 |
writer.setDefaultNamespace(ns); |
|
206 |
|
|
207 |
writer.writeStartElement("TEI"); |
|
208 |
|
|
209 |
writer.writeStartElement("teiHeader"); |
|
210 |
writer.writeStartElement("fileDesc"); |
|
211 |
|
|
212 |
writer.writeStartElement("titleStmt"); |
|
213 |
writer.writeStartElement("title"); |
|
214 |
writer.writeCharacters(textid); |
|
215 |
writer.writeEndElement(); // title |
|
216 |
writer.writeEndElement(); // titleStmt |
|
217 |
|
|
218 |
writer.writeStartElement("publicationStmt"); |
|
219 |
writer.writeStartElement("p"); |
|
220 |
writer.writeCharacters("PUBLICATION INFO à renseigner"); |
|
221 |
writer.writeEndElement(); // p |
|
222 |
writer.writeEndElement(); // publicationStmt |
|
223 |
|
|
224 |
writer.writeStartElement("sourceDesc"); |
|
225 |
writer.writeStartElement("p"); |
|
226 |
writer.writeCharacters("Ce document permet l'annotation sémantique de tous les textes, par auteur"); |
|
227 |
writer.writeEndElement(); // p |
|
228 |
writer.writeEndElement(); // sourceDesc |
|
229 |
|
|
230 |
writer.writeEndElement(); // </fileDesc> |
|
231 |
writer.writeStartElement("encodingDesc"); |
|
232 |
writer.writeStartElement("projectDesc"); |
|
233 |
writer.writeStartElement("p"); |
|
234 |
writer.writeCharacters("Annotations created by "+currentAuthor+", for the use in Symogih XML platform"); |
|
235 |
writer.writeEndElement(); // p |
|
236 |
writer.writeEndElement(); // </projectDesc> |
|
237 |
writer.writeEndElement(); // </encodingDesc> |
|
238 |
writer.writeEndElement(); // </teiHeader> |
|
239 |
|
|
240 |
|
|
241 |
writer.writeStartElement("text"); |
|
242 |
writer.writeStartElement("body"); |
|
243 |
writer.writeCharacters("\n"); |
|
244 |
writer.writeStartElement("div"); |
|
245 |
writer.writeCharacters("\n"); |
|
246 |
} catch (XMLStreamException e) { |
|
247 |
// TODO Auto-generated catch block |
|
248 |
e.printStackTrace(); |
|
249 |
} catch (IOException e) { |
|
250 |
// TODO Auto-generated catch block |
|
251 |
e.printStackTrace(); |
|
252 |
} |
|
253 |
return writer; |
|
254 |
} |
|
255 |
|
|
256 |
private void storeAnnotation() { |
|
257 |
allannotations.get(currentAuthor).get(currentDate).add(new String[]{currentDate, currentRef, currentType}); |
|
258 |
} |
|
259 |
|
|
260 |
private void storeAnnotationPositions() { |
|
261 |
allannotationspositions.get(currentAuthor).get(currentDate).add(positions); |
|
262 |
} |
|
263 |
|
|
264 |
/** |
|
265 |
* write stand-off annotation |
|
266 |
*/ |
|
267 |
private void writeStartAnnotationToStandoffFile(String[] data, ArrayList<String> positions) { |
|
268 |
//System.out.println("writeStartAnnotationToStandoffFile ..."); |
|
269 |
try { |
|
270 |
currentWriter.writeStartElement("span"); |
|
271 |
currentWriter.writeAttribute("type","named_entities_identifications"); |
|
272 |
currentWriter.writeAttribute("ana", data[1]); |
|
273 |
StringBuffer listWids = new StringBuffer(); |
|
274 |
for (String posW : positions) { |
|
275 |
listWids.append("#"+posW+" "); |
|
276 |
} |
|
277 |
currentWriter.writeAttribute("target", listWids.toString().trim()); |
|
278 |
currentWriter.writeComment("type="+data[2]); |
|
279 |
currentWriter.writeEndElement(); // span |
|
280 |
currentWriter.writeCharacters("\n"); |
|
281 |
|
|
282 |
} catch (XMLStreamException e) { |
|
283 |
// TODO Auto-generated catch block |
|
284 |
e.printStackTrace(); |
|
285 |
} |
|
286 |
} |
|
287 |
|
|
288 |
protected void processEndElement() throws XMLStreamException { |
|
289 |
boolean foundAnnot = false; |
|
290 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // skip annotation end element |
|
291 |
//System.out.println(" END "+ localname); |
|
292 |
foundAnnot = true; |
|
293 |
//annotation ends here |
|
294 |
storeAnnotationPositions(); |
|
295 |
|
|
296 |
if (annotationsPositions.containsKey(localname)) { |
|
297 |
annotationsPositions.remove(localname); |
|
298 |
} |
|
299 |
|
|
300 |
startAnnotation = false; |
|
301 |
} |
|
302 |
|
|
303 |
if (!foundAnnot) { |
|
304 |
if (localname.equals("TEI")) { |
|
305 |
try { |
|
306 |
for (String author : allannotations.keySet()) { |
|
307 |
System.out.println(" author="+author); |
|
308 |
File currentXmlFile = new File(xmlStandOffDirectory, filename.substring(0, filename.length()-4)+"_"+currentAuthor+"_annotations.xml"); |
|
309 |
|
|
310 |
// write start of the stand-off file |
|
311 |
currentWriter = writeStartStandOffFile(currentXmlFile); |
|
312 |
|
|
313 |
// write annotations grouped per date |
|
314 |
HashMap<String, ArrayList<String[]>> dates = allannotations.get(author); |
|
315 |
HashMap<String, ArrayList<ArrayList<String>>> datesPositions = allannotationspositions.get(author); |
|
316 |
for (String date : datesPositions.keySet()) { |
|
317 |
ArrayList<String[]> datas = dates.get(date); |
|
318 |
ArrayList<ArrayList<String>> positions = datesPositions.get(date); |
|
319 |
|
|
320 |
currentWriter.writeStartElement("div"); |
|
321 |
currentWriter.writeCharacters("\n"); |
|
322 |
currentWriter.writeStartElement("head"); |
|
323 |
currentWriter.writeStartElement("date"); |
|
324 |
currentWriter.writeAttribute("type","annotation_date"); |
|
325 |
currentWriter.writeAttribute("when", date); |
|
326 |
currentWriter.writeEndElement(); // date |
|
327 |
currentWriter.writeEndElement(); // head |
|
328 |
currentWriter.writeCharacters("\n"); |
|
329 |
|
|
330 |
for (int i = 0 ; i < datas.size() ; i++) { |
|
331 |
writeStartAnnotationToStandoffFile(datas.get(i), positions.get(i)); |
|
332 |
} |
|
333 |
|
|
334 |
currentWriter.writeEndElement(); // div |
|
335 |
currentWriter.writeCharacters("\n"); |
|
336 |
} |
|
337 |
|
|
338 |
// write the end of the stand-off file |
|
339 |
writeEndStandOffFile(currentWriter); |
|
340 |
} |
|
341 |
} |
|
342 |
catch(Exception e){ |
|
343 |
System.out.println("Current writer "+currentWriter); |
|
344 |
} |
|
345 |
} |
|
346 |
super.processEndElement(); |
|
347 |
} |
|
348 |
} |
|
349 |
|
|
350 |
public static void main(String strs[]){ |
|
351 |
try { |
|
352 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J", "tdm80j.xml"); |
|
353 |
File outfile = new File(xmlFile.getParentFile(), "tdm80j-tei.xml"); |
|
354 |
|
|
355 |
File xmlStandOffDirectory = xmlFile.getParentFile(); |
|
356 |
List<AnnotationType> types = new ArrayList<AnnotationType>(); |
|
357 |
types.add(new AnnotationType("local","actr_id", "actr")); |
|
358 |
AnnotationSyMoGIHWriter annotStdff; |
|
359 |
try { |
|
360 |
annotStdff = new AnnotationSyMoGIHWriter("TDBM80J", xmlFile, xmlStandOffDirectory, types, true); |
|
361 |
annotStdff.process(outfile); |
|
362 |
} catch (MalformedURLException e) { |
|
363 |
// TODO Auto-generated catch block |
|
364 |
e.printStackTrace(); |
|
365 |
} |
|
366 |
} catch(Exception e) { |
|
367 |
e.printStackTrace(); |
|
368 |
} |
|
369 |
} |
|
370 |
} |
|
0 | 371 |
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationWriter.java (revision 344) | ||
---|---|---|
1 |
package org.txm.annotation; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.FileInputStream; |
|
5 |
import java.io.FileOutputStream; |
|
6 |
import java.io.IOException; |
|
7 |
import java.io.InputStream; |
|
8 |
import java.io.OutputStream; |
|
9 |
import java.util.ArrayList; |
|
10 |
import java.util.HashMap; |
|
11 |
import java.util.List; |
|
12 |
import java.util.logging.Level; |
|
13 |
|
|
14 |
import javax.xml.stream.XMLStreamException; |
|
15 |
|
|
16 |
import org.apache.commons.lang.StringUtils; |
|
17 |
import org.txm.Toolbox; |
|
18 |
import org.txm.annotation.repository.AnnotationEffect; |
|
19 |
import org.txm.annotation.repository.AnnotationType; |
|
20 |
import org.txm.annotation.repository.KnowledgeRepository; |
|
21 |
import org.txm.core.preferences.TBXPreferences; |
|
22 |
import org.txm.importer.ValidateXml; |
|
23 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
24 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
25 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
26 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
27 |
import org.txm.stat.utils.ConsoleProgressBar; |
|
28 |
import org.txm.utils.DeleteDir; |
|
29 |
import org.txm.utils.logger.Log; |
|
30 |
import org.txm.utils.zip.Zip; |
|
31 |
import org.w3c.dom.Document; |
|
32 |
import org.w3c.dom.Node; |
|
33 |
|
|
34 |
|
|
35 |
public class AnnotationWriter { |
|
36 |
|
|
37 |
MainCorpus corpus; |
|
38 |
private List<AnnotationType> types; |
|
39 |
private KnowledgeRepository defaultKR; |
|
40 |
|
|
41 |
public AnnotationWriter(MainCorpus corpus){ |
|
42 |
this.corpus = corpus; |
|
43 |
defaultKR = corpus.getKnowledgeRepository(corpus.getKnowledgeRepositoryNames().get(0)); |
|
44 |
types = defaultKR.getAllAnnotationTypes(); |
|
45 |
|
|
46 |
} |
|
47 |
|
|
48 |
/** |
|
49 |
* process a text to build standoff files |
|
50 |
* |
|
51 |
* @param textid |
|
52 |
* @param currentXMLFile |
|
53 |
* @param currentXMLStandoffFile |
|
54 |
* @param xmlStandOffDirectory |
|
55 |
* @return |
|
56 |
* @throws IOException |
|
57 |
* @throws CqiServerError |
|
58 |
* @throws CqiClientException |
|
59 |
* @throws InvalidCqpIdException |
|
60 |
* @throws XMLStreamException |
|
61 |
*/ |
|
62 |
protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
63 |
System.out.println(" text="+textid); |
|
64 |
boolean show_debug = Log.getLevel().intValue() < Level.WARNING.intValue(); |
|
65 |
|
|
66 |
AnnotationSyMoGIHWriter annotationstdoff = new AnnotationSyMoGIHWriter(textid, currentXMLFile, xmlStandOffDirectory, types, show_debug); |
|
67 |
|
|
68 |
///rather test on the new xml standoff files |
|
69 |
if (annotationstdoff.process(currentXMLStandoffFile)) { |
|
70 |
if (ValidateXml.test(currentXMLStandoffFile)) { //TODO ALSO check if annotations are well-written |
|
71 |
return true; |
|
72 |
} else { |
|
73 |
System.out.println("Error: result file "+currentXMLStandoffFile+" is malformed."); |
|
74 |
} |
|
75 |
} else { |
|
76 |
System.out.println("Error: while processing "+currentXMLStandoffFile+" in standoff dir"); |
|
77 |
} |
|
78 |
return false; |
|
79 |
|
|
80 |
} |
|
81 |
|
|
82 |
/** |
|
83 |
* Writing annotations in standoff files for each text of the corpus |
|
84 |
* |
|
85 |
* @param allCorpusAnnotations |
|
86 |
* @return |
|
87 |
* @throws IOException |
|
88 |
* @throws CqiServerError |
|
89 |
* @throws CqiClientException |
|
90 |
* @throws InvalidCqpIdException |
|
91 |
* @throws XMLStreamException |
|
92 |
*/ |
|
93 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
94 |
|
|
95 |
List<String> textsIds = corpus.getTextsID(); |
|
96 |
System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+"."); |
|
97 |
|
|
98 |
File resultDirectory = new File(Toolbox.getParam(TBXPreferences.USER_TXM_HOME), "results/"+corpus.getName()+"_annotations"); |
|
99 |
DeleteDir.deleteDirectory(resultDirectory); |
|
100 |
resultDirectory.mkdirs(); |
|
101 |
if (!(resultDirectory.exists() && resultDirectory.canWrite())) { |
|
102 |
System.out.println("ERROR: could not create/write temporary directory: "+resultDirectory); |
|
103 |
return false; |
|
104 |
} |
|
105 |
|
|
106 |
File inputDirectory = corpus.getBaseDirectory(); |
|
107 |
File corpusTxmDirectory = new File(inputDirectory, "txm/"+corpus.getName()); |
|
108 |
|
|
109 |
System.out.println("Writing annotations XML files in "+resultDirectory); |
|
110 |
for (String textid : textsIds) { |
|
111 |
File currentXMLFile = new File(corpusTxmDirectory, textid+".xml"); |
|
112 |
if (!currentXMLFile.exists()) { |
|
113 |
System.out.println("Error: cannot found XML file for text with id="+textid); |
|
114 |
return false; |
|
115 |
} |
|
116 |
File currentXMLStandoffFile = new File(resultDirectory, textid+".xml"); //To Be Changed ? |
|
117 |
if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory )) { |
|
118 |
System.out.println("Error while writing annotations of text "+currentXMLStandoffFile); |
|
119 |
return false; |
|
120 |
} |
|
121 |
} |
|
122 |
|
|
123 |
Zip.compress(resultDirectory, resultZipFile, new ConsoleProgressBar(1)); |
|
124 |
DeleteDir.deleteDirectory(resultDirectory); |
|
125 |
|
|
126 |
System.out.println("Annotations saved in "+resultZipFile.getAbsolutePath()); |
|
127 |
return resultZipFile.exists(); |
|
128 |
} |
|
129 |
|
|
130 |
/** |
|
131 |
* |
|
132 |
* @param allCorpusAnnotations ordered annotations |
|
133 |
* @return |
|
134 |
* @throws IOException |
|
135 |
* @throws CqiServerError |
|
136 |
* @throws CqiClientException |
|
137 |
* @throws InvalidCqpIdException |
|
138 |
* @throws XMLStreamException |
|
139 |
*/ |
|
140 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
141 |
//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
|
142 |
System.out.println("Saving "+allCorpusAnnotations.size()+"annotations..."); |
|
143 |
|
|
144 |
int[] end_limits = corpus.getTextEndLimits(); |
|
145 |
int[] start_limits = corpus.getTextStartLimits(); |
|
146 |
List<String> textsIds = corpus.getTextsID(); |
|
147 |
|
|
148 |
File inputDirectory = corpus.getBaseDirectory(); |
|
149 |
File txmDirectory = new File(inputDirectory, "txm/"+corpus.getName()); |
|
150 |
|
|
151 |
ArrayList<Annotation> textAnnotations = new ArrayList<Annotation>(); |
|
152 |
HashMap<String, ArrayList<Annotation>> annotationsPerTexts = new HashMap<String, ArrayList<Annotation>>(); |
|
153 |
|
|
154 |
int currentText = 0; |
|
155 |
File currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml"); |
|
156 |
|
|
157 |
//store first text |
|
158 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml"); |
|
159 |
textAnnotations = new ArrayList<Annotation>(); |
|
160 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
|
161 |
|
|
162 |
// group annotations per text |
|
163 |
for (Annotation currentAnnot : allCorpusAnnotations) { // parse all annotations |
|
164 |
//System.out.println(" Annotation: "+currentAnnot); |
|
165 |
int pos = currentAnnot.getPK().getEndPosition(); |
|
166 |
|
|
167 |
while (pos > end_limits[currentText]) { // while pos is not in the currentText.end |
|
168 |
currentText++; |
|
169 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml"); |
|
170 |
textAnnotations = new ArrayList<Annotation>(); |
|
171 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
|
172 |
} |
|
173 |
|
|
174 |
textAnnotations.add(currentAnnot); |
|
175 |
} |
|
176 |
|
|
177 |
File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_tmp"); |
|
178 |
DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
|
179 |
tmpXMLTXMDirectory.mkdirs(); |
|
180 |
if (!(tmpXMLTXMDirectory.exists() && tmpXMLTXMDirectory.canWrite())) { |
|
181 |
System.out.println("ERROR: could not create directory: "+tmpXMLTXMDirectory); |
|
182 |
return false; |
|
183 |
} |
|
184 |
|
|
185 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous"); |
|
186 |
//DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
|
187 |
previousXMLTXMDirectory.mkdirs(); |
|
188 |
if (!(previousXMLTXMDirectory.exists() && previousXMLTXMDirectory.canWrite())) { |
|
189 |
System.out.println("ERROR: could not create directory: "+previousXMLTXMDirectory); |
|
190 |
return false; |
|
191 |
} |
|
192 |
|
|
193 |
System.out.println("Annotations grouped per text for "+annotationsPerTexts.size()+" text"+(annotationsPerTexts.size() > 0?"s":"")); |
|
194 |
System.out.println(" - Writing temporary XML files in: "+tmpXMLTXMDirectory); |
|
195 |
System.out.println(" - Copying previous version of XML files in: "+previousXMLTXMDirectory); |
|
196 |
// for all annotation PER TEXT, update the XML-TXM files |
|
197 |
currentText = 0; |
|
198 |
while (currentText < end_limits.length ) { // end limits : 10, 30, 45, 55, 103 |
|
199 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml"); |
|
200 |
ArrayList<Annotation> allAnnotations = annotationsPerTexts.get(textsIds.get(currentText)); |
|
201 |
if (allAnnotations != null && allAnnotations.size() > 0) { |
|
202 |
ArrayList<Annotation> allSegmentAnnotations = new ArrayList<Annotation>(); |
|
203 |
ArrayList<Annotation> allTokenAnnotations = new ArrayList<Annotation>(); |
|
204 |
|
|
205 |
KnowledgeRepository defaultKR = corpus.getKnowledgeRepository(corpus.getKnowledgeRepositoryNames().get(0)); |
|
206 |
System.out.println("Using KR="+defaultKR); |
|
207 |
for (Annotation a : allAnnotations) { |
|
208 |
AnnotationType type = defaultKR.getType(a.getType()); |
|
209 |
if (type != null) { |
|
210 |
if (AnnotationEffect.SEGMENT.equals(type.getEffect())) { |
|
211 |
allSegmentAnnotations.add(a); |
|
212 |
} else if (AnnotationEffect.TOKEN.equals(type.getEffect())) { |
|
213 |
allTokenAnnotations.add(a); |
|
214 |
} else { |
|
215 |
System.out.println("Annotation "+a+" with type="+a.getType()+" not found in default KR="+defaultKR); |
|
216 |
} |
|
217 |
} else { |
|
218 |
System.out.println("Warning: unknowed type: "+a.getType()); |
|
219 |
} |
|
220 |
} |
|
221 |
|
|
222 |
if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText], |
|
223 |
allSegmentAnnotations, allTokenAnnotations, |
|
224 |
tmpXMLTXMDirectory, previousXMLTXMDirectory)) { |
|
225 |
System.out.println("Error while writing annotations of text "+currentXMLFile); |
|
226 |
return false; |
|
227 |
} |
|
228 |
} |
|
229 |
currentText++; |
|
230 |
} |
|
231 |
|
|
232 |
return true; |
|
233 |
} |
|
234 |
|
|
235 |
//writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory)) |
|
236 |
|
|
237 |
protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, |
|
238 |
ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException, XMLStreamException{ |
|
239 |
System.out.println("Writing annotations for text "+xmlFile+" segment annotations="+segmentAnnotations.size()+" token annotations="+tokenAnnotations.size()); |
|
240 |
System.out.println(segmentAnnotations); |
|
241 |
System.out.println(tokenAnnotations); |
|
242 |
|
|
243 |
boolean show_debug = Log.getLevel().intValue() < Level.WARNING.intValue(); |
|
244 |
AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, segmentAnnotations, tokenAnnotations, text_start_position, show_debug); |
|
245 |
|
|
246 |
File tmpfile = new File(tmpXMLTXMDirectory, xmlFile.getName()); |
|
247 |
File previousfile = new File(previousXMLTXMDirectory, xmlFile.getName()); |
|
248 |
|
|
249 |
if (annotationInjector.process(tmpfile)) { |
|
250 |
if (ValidateXml.test(tmpfile)) { //TODO ALSO check if annotations are well-written |
|
251 |
previousfile.delete(); // in case there is one |
|
252 |
if (!previousfile.exists() && xmlFile.renameTo(previousfile)) { |
|
253 |
tmpfile.renameTo(xmlFile); |
|
254 |
return true; |
|
255 |
} else { |
|
256 |
System.out.println("Error: could not replace XML-TXM file: "+xmlFile+" with "+tmpfile); |
|
257 |
} |
|
258 |
} else { |
|
259 |
System.out.println("Error: result file "+tmpfile+" is malformed."); |
|
260 |
} |
|
261 |
} else { |
|
262 |
System.out.println("Error: while processing "+xmlFile+" in temp dir"); |
|
263 |
} |
|
264 |
return false; |
|
265 |
} |
|
266 |
} |
|
0 | 267 |
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationInjector.java (revision 344) | ||
---|---|---|
1 |
package org.txm.annotation; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.net.MalformedURLException; |
|
6 |
import java.util.ArrayList; |
|
7 |
import java.util.Collections; |
|
8 |
import java.util.Comparator; |
|
9 |
import java.util.Date; |
|
10 |
import java.util.HashMap; |
|
11 |
import java.util.List; |
|
12 |
|
|
13 |
import javax.xml.stream.XMLInputFactory; |
|
14 |
import javax.xml.stream.XMLStreamException; |
|
15 |
|
|
16 |
import org.txm.importer.StaxIdentityParser; |
|
17 |
import org.txm.importer.ValidateXml; |
|
18 |
import org.txm.objects.BaseParameters; |
|
19 |
|
|
20 |
/** |
|
21 |
* The Class AnnotationInjection. |
|
22 |
* |
|
23 |
* @author mdecorde |
|
24 |
* |
|
25 |
* inject annotation from a stand-off file into a xml-tei-txm file "id" |
|
26 |
*/ |
|
27 |
|
|
28 |
public class AnnotationInjector extends StaxIdentityParser { |
|
29 |
|
|
30 |
File xmlFile; |
|
31 |
HashMap<Integer, List<Annotation>> annotationsToAddByStartPos; // contains |
|
32 |
// annotation |
|
33 |
// to write |
|
34 |
HashMap<Integer, List<Annotation>> annotationsToAddByEndPos; |
|
35 |
List<Annotation> currentStartAnnotations; |
|
36 |
List<Annotation> currentEndAnnotations; |
|
37 |
HashMap<String, Annotation> currentTokenAnnotations = new HashMap<String, Annotation>(); |
|
38 |
|
|
39 |
int n = 0; |
|
40 |
boolean debug = false; |
|
41 |
|
|
42 |
String data; // the word id properties to add/replace |
|
43 |
String newform = null; |
|
44 |
String wordId; |
|
45 |
boolean inW = false; |
|
46 |
String anaType; |
|
47 |
|
|
48 |
int position_counter = 0; |
|
49 |
|
|
50 |
String closeNext = null; // the next close tag to delete |
|
51 |
ArrayList<String> openedElements = new ArrayList<String>(); // to ensure to |
|
52 |
// delete the |
|
53 |
// right element |
|
54 |
|
|
55 |
private List<Annotation> previousEndAnnotations; |
|
56 |
private ArrayList<Annotation> tokenAnnotations; |
|
57 |
|
|
58 |
String user; |
|
59 |
|
|
60 |
/** |
|
61 |
* |
|
62 |
* @param xmlFile |
|
63 |
* @param tokenAnnotations |
|
64 |
* @param annotationsToAddByStartPos |
|
65 |
* @param annotationsToAddByEndPos |
|
66 |
* @param corpus_start_position |
|
67 |
* @param debug |
|
68 |
* @throws XMLStreamException |
|
69 |
* @throws IOException |
|
70 |
*/ |
|
71 |
public AnnotationInjector(File xmlFile, List<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, int corpus_start_position, boolean debug) throws IOException, XMLStreamException { |
|
72 |
super(xmlFile.toURI().toURL()); // init reader and writer |
|
73 |
|
|
74 |
this.user = System.getProperty("user.name"); |
|
75 |
|
|
76 |
this.debug = debug; |
|
77 |
this.xmlFile = xmlFile; |
|
78 |
this.n = 0; |
|
79 |
// println ""+records.size()+" lines to process..." |
|
80 |
this.position_counter = corpus_start_position; |
|
81 |
factory = XMLInputFactory.newInstance(); |
|
82 |
|
|
83 |
// preparing annotations to being written in the right inclusion order |
|
84 |
annotationsToAddByStartPos = new HashMap<Integer, List<Annotation>>(); |
|
85 |
annotationsToAddByEndPos = new HashMap<Integer, List<Annotation>>(); |
|
86 |
|
|
87 |
for (Annotation a : segmentAnnotations) { |
|
88 |
if (!annotationsToAddByStartPos.containsKey(a.getStart())) |
|
89 |
annotationsToAddByStartPos.put(a.getStart(), new ArrayList<Annotation>()); |
|
90 |
if (!annotationsToAddByEndPos.containsKey(a.getEnd())) |
|
91 |
annotationsToAddByEndPos.put(a.getEnd(), new ArrayList<Annotation>()); |
|
92 |
|
|
93 |
annotationsToAddByStartPos.get(a.getStart()).add(a); |
|
94 |
annotationsToAddByEndPos.get(a.getEnd()).add(a); |
|
95 |
} |
|
96 |
|
|
97 |
for (int i : annotationsToAddByStartPos.keySet()) { |
|
98 |
List<Annotation> a = annotationsToAddByStartPos.get(i); |
|
99 |
Collections.sort(a, new Comparator<Annotation>() { // reverse sort |
|
100 |
// annotation -> |
|
101 |
// write the |
|
102 |
// smaller in |
|
103 |
// the bigger |
|
104 |
@Override |
|
105 |
public int compare(Annotation arg0, Annotation arg1) { |
|
106 |
return arg1.getEnd() - arg0.getEnd(); |
|
107 |
} |
|
108 |
}); |
|
109 |
} |
|
110 |
for (int i : annotationsToAddByEndPos.keySet()) { |
|
111 |
List<Annotation> a = annotationsToAddByEndPos.get(i); |
|
112 |
Collections.sort(a, new Comparator<Annotation>() { // reverse sort |
|
113 |
// annotation -> |
|
114 |
// write the |
|
115 |
// smaller in |
|
116 |
// the bigger |
|
117 |
@Override |
|
118 |
public int compare(Annotation arg0, Annotation arg1) { |
|
119 |
return arg1.getStart() - arg0.getStart(); |
|
120 |
} |
|
121 |
}); |
|
122 |
} |
|
123 |
|
|
124 |
// sorting token annotations |
|
125 |
this.tokenAnnotations = tokenAnnotations; |
|
126 |
Collections.sort(tokenAnnotations, new Comparator<Annotation>() { |
|
127 |
@Override |
|
128 |
public int compare(Annotation arg0, Annotation arg1) { |
|
129 |
return arg0.getStart() - arg1.getStart(); |
|
130 |
} |
|
131 |
}); |
|
132 |
|
|
133 |
if (debug) { |
|
134 |
System.out.println("annotations for " + xmlFile); |
|
135 |
System.out.println(" segment grouped and ordered by start position: " + annotationsToAddByStartPos); |
|
136 |
System.out.println(" segment grouped and ordered by end position: " + annotationsToAddByEndPos); |
|
137 |
System.out.println(" token ordered by start position: " + tokenAnnotations); |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
boolean mustChangeAnaValue = false; |
|
142 |
boolean inAna = false; |
|
143 |
|
|
144 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
145 |
|
|
146 |
currentStartAnnotations = annotationsToAddByStartPos.get(position_counter); |
|
147 |
currentEndAnnotations = annotationsToAddByEndPos.get(position_counter); |
|
148 |
//System.out.println("A Starts: "+currentStartAnnotations+" "+currentEndAnnotations); |
|
149 |
// if (debug) |
|
150 |
// System.out.println("----- "+localname+"@"+position_counter+" START ANNOT = "+currentStartAnnotations); |
|
151 |
if ("teiHeader".equals(localname)) { |
|
152 |
super.processStartElement(); |
|
153 |
goToEnd("teiHeader"); // skip teiHeader // will be closed in |
|
154 |
// processEndElement() |
|
155 |
} else if ("w".equals(localname)) { |
|
156 |
// System.out.println("W START pos="+position_counter+" annots="+currentStartAnnotations); |
|
157 |
if (currentStartAnnotations != null) {// there are still annotations |
|
158 |
// to write, possible |
|
159 |
// several for a position |
|
160 |
// if (debug) |
|
161 |
// System.out.println("WRITING START at "+position_counter+": "+currentStartAnnotations); |
|
162 |
for (Annotation a : currentStartAnnotations) { |
|
163 |
if (!"#del".equals(a.getValue())) { |
|
164 |
if (debug) |
|
165 |
System.out.println(" force write start annotation " + a); |
|
166 |
writeStartAnnotation(a); |
|
167 |
} else { |
|
168 |
if (debug) |
|
169 |
System.out.println(" no need to write start annotation " + a); |
|
170 |
currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd()); |
|
171 |
// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+ |
|
172 |
// " | currentEndAnnotations : "+currentEndAnnotations); |
|
173 |
if (currentEndAnnotations == null) { |
|
174 |
System.out.println("WARNING ERROR null pointer for end position of annotation " + a); |
|
175 |
} else { |
|
176 |
currentEndAnnotations.remove(a); |
|
177 |
} |
|
178 |
} |
|
179 |
} |
|
180 |
} |
|
181 |
inW = true; |
|
182 |
|
|
183 |
// get token annotation if any |
|
184 |
currentTokenAnnotations.clear(); // current word annotations to write, may be empty |
|
185 |
Annotation a = null; |
|
186 |
|
|
187 |
if (tokenAnnotations.size() > 0) { |
|
188 |
a = tokenAnnotations.get(0); |
|
189 |
//System.out.println("for p="+position_counter+" next token annot="+a); |
|
190 |
|
|
191 |
if (a.getStart() <= position_counter && position_counter <= a.getEnd()) { |
|
192 |
// write the annotation |
|
193 |
currentTokenAnnotations.put(a.getType(), a); |
|
194 |
if (a.getEnd() == position_counter) // its the last word that needs to write this annotation |
|
195 |
tokenAnnotations.remove(0); |
|
196 |
} |
|
197 |
} |
|
198 |
|
|
199 |
super.processStartElement(); // write the tag |
|
200 |
} else if ("ana".equals(localname)) { |
|
201 |
inAna = true; |
|
202 |
String type = parser.getAttributeValue(null, "type").substring(1); |
|
203 |
if (currentTokenAnnotations.containsKey(type)) { |
|
204 |
Annotation a = currentTokenAnnotations.get(type); |
|
205 |
//System.out.println("Updating token annotation with: " + a); |
|
206 |
|
|
207 |
String value = a.getValue(); |
|
208 |
if ("#del".equals(value)) |
|
209 |
value = ""; |
|
210 |
if (value == null) |
|
211 |
value = "ERROR"; |
|
212 |
|
|
213 |
try { |
|
214 |
writer.writeStartElement("ana"); |
|
215 |
writer.writeAttribute("type", "#" + type); |
|
216 |
writer.writeAttribute("resp", "#" + a.getAnnotator()); // change |
|
217 |
// resp |
|
218 |
writer.writeCharacters(value); |
|
219 |
mustChangeAnaValue = true; |
|
220 |
} catch (XMLStreamException e) { |
|
221 |
// TODO Auto-generated catch block |
|
222 |
e.printStackTrace(); |
|
223 |
} |
|
224 |
|
|
225 |
currentTokenAnnotations.remove(type); // annotation updated |
|
226 |
} else { |
|
227 |
super.processStartElement(); |
|
228 |
} |
|
229 |
} else if (currentStartAnnotations != null) { // is it an annotations ? |
|
230 |
// System.out.println("A START="+localname); |
|
231 |
boolean toDelete = false; |
|
232 |
for (int i = 0; i < currentStartAnnotations.size(); i++) { |
|
233 |
Annotation a = currentStartAnnotations.get(i); |
|
234 |
// if (debug) |
|
235 |
// System.out.println("=============== Start annot : "+a); |
|
236 |
if (a.getType().equals(localname)) { // force delete, will be |
|
237 |
// written in the "w" |
|
238 |
// case |
|
239 |
int start = Integer.parseInt(parser.getAttributeValue(null, "start")); |
|
240 |
int end = Integer.parseInt(parser.getAttributeValue(null, "end")); |
|
241 |
if (a.getStart() == start && a.getEnd() == end) { |
|
242 |
// updated = true; |
|
243 |
// if (debug) |
|
244 |
// System.out.println(" found existing annotation "+a); |
|
245 |
if ("#del".equals(a.getValue())) { |
|
246 |
toDelete = true; |
|
247 |
// currentEndAnnotations.remove(a); // MAYBE NOT TO |
|
248 |
// DO THIS HERE ? |
|
249 |
if (debug) |
|
250 |
System.out.println(" force delete start annotation " + a); |
|
251 |
} else { // update existing annotation, no need to store |
|
252 |
// the end of annotation |
|
253 |
writeStartAnnotation(a); |
|
254 |
toDelete = true; |
|
255 |
if (debug) |
|
256 |
System.out.println(" update annotation " + a); |
|
257 |
currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());// EL |
|
258 |
// NO |
|
259 |
// CAPITO |
|
260 |
// YET |
|
261 |
// !! |
|
262 |
// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+ |
|
263 |
// " | currentEndAnnotations : "+currentEndAnnotations); |
|
264 |
if (currentEndAnnotations == null) { |
|
265 |
System.out.println("WARNING ERROR null pointer for end position of annotation " + a); |
|
266 |
} else { |
|
267 |
currentEndAnnotations.remove(a); |
|
268 |
} |
|
269 |
} |
|
270 |
|
|
271 |
currentStartAnnotations.remove(i); |
|
272 |
i--; |
|
273 |
} |
|
274 |
|
|
275 |
break; // no need to continue |
|
276 |
} else { |
|
277 |
// System.out.println("------- with same start pos"); |
|
278 |
String s_end = parser.getAttributeValue(null, "end"); |
|
279 |
String s_start = parser.getAttributeValue(null, "start"); |
|
280 |
String s_author = parser.getAttributeValue(null, "author"); |
|
281 |
String s_ref = parser.getAttributeValue(null, "ref"); |
|
282 |
if (s_start != null && s_end != null && s_author != null && s_ref != null) { |
|
283 |
int end = Integer.parseInt(s_end); |
|
284 |
if (a.getEnd() >= end) { // must write a |
|
285 |
if (!"#del".equals(a.getValue())) { |
|
286 |
if (debug) |
|
287 |
System.out.println(" writing of start annotation " + a); |
|
288 |
writeStartAnnotation(a); |
|
289 |
|
|
290 |
} else { |
|
291 |
toDelete = true; |
|
292 |
if (debug) |
|
293 |
System.out.println(" no writing of start annotation " + a); |
|
294 |
currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd()); |
|
295 |
// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+ |
|
296 |
// " | currentEndAnnotations : "+currentEndAnnotations); |
|
297 |
if (currentEndAnnotations == null) { |
|
298 |
System.out.println("WARNING ERROR null pointer for end position of annotation " + a); |
|
299 |
} else { |
|
300 |
currentEndAnnotations.remove(a); |
|
301 |
} |
|
302 |
} |
|
303 |
currentStartAnnotations.remove(i); |
|
304 |
i--; |
|
305 |
// break; |
|
306 |
} |
|
307 |
} |
|
308 |
} |
|
309 |
} |
|
310 |
if (!toDelete) { // the structure or annotation was not modified |
|
311 |
super.processStartElement(); |
|
312 |
} |
|
313 |
|
|
314 |
} else { |
|
315 |
// System.out.println("X START="+localname); |
|
316 |
super.processStartElement(); |
|
317 |
} |
|
318 |
} |
|
319 |
|
|
320 |
@Override |
|
321 |
public void processCharacters() throws XMLStreamException { |
|
322 |
//System.out.println("processCharaters inAna="+inAna+" mustChangeAnaValue="+mustChangeAnaValue); |
|
323 |
if (inAna && mustChangeAnaValue) { |
|
324 |
// nothing content is already written |
|
325 |
//System.out.println("skip ana value because we replace it"); |
|
326 |
} else { |
|
327 |
super.processCharacters(); |
|
328 |
} |
|
329 |
} |
|
330 |
|
|
331 |
protected void processEndElement() throws XMLStreamException { |
|
332 |
// currentEndAnnotations = |
|
333 |
// annotationsToAddByEndPos.get(position_counter); // annotation to end |
|
334 |
previousEndAnnotations = annotationsToAddByEndPos.get(position_counter - 1); // existing |
|
335 |
// (or |
|
336 |
// not) |
|
337 |
// annotation |
|
338 |
// that |
|
339 |
// have |
|
340 |
// already |
|
341 |
// been |
|
342 |
// closed |
|
343 |
// if (debug) |
|
344 |
// System.out.println("----- "+localname+"@"+position_counter+" END ANNOT = "+currentEndAnnotations); |
|
345 |
|
|
346 |
if ("w".equals(localname)) { |
|
347 |
// System.out.println("W END"); |
|
348 |
|
|
349 |
for (String type : currentTokenAnnotations.keySet()) { |
|
350 |
try { |
|
351 |
writer.writeStartElement("ana"); |
|
352 |
writer.writeAttribute("type", "#" + type); |
|
353 |
writer.writeAttribute("resp", "#" + currentTokenAnnotations.get(type).getAnnotator()); // change |
|
354 |
// resp |
|
355 |
writer.writeCharacters(currentTokenAnnotations.get(type).getValue()); |
|
356 |
writer.writeEndElement(); |
|
357 |
} catch (XMLStreamException e) { |
|
358 |
e.printStackTrace(); |
|
359 |
} |
|
360 |
} |
|
361 |
|
|
362 |
super.processEndElement(); // write word then close annotations |
|
363 |
inW = false; |
|
364 |
// force write All known annotation after the word |
|
365 |
if (currentEndAnnotations != null) {// there are still annotations |
|
366 |
// to write, possible several |
|
367 |
// for a position |
|
368 |
// if (debug) |
|
369 |
// System.out.println("WRITING END at "+position_counter+": "+currentEndAnnotations); |
|
370 |
for (Annotation a : currentEndAnnotations) { |
|
371 |
// if (debug) |
|
372 |
// System.out.println("=============== End annot : "+a); |
|
373 |
if (!"#del".equals(a.getValue())) { |
|
374 |
if (debug) |
|
375 |
System.out.println(" force write end annotation " + a); |
|
376 |
writeEndAnnotation(a); |
|
377 |
}/* |
|
378 |
* else { if (debug) |
|
379 |
* System.out.println(" no need to write end annotation " |
|
380 |
* +a); } |
|
381 |
*/ |
|
382 |
|
|
383 |
} |
|
384 |
} |
|
385 |
|
|
386 |
position_counter++; |
|
387 |
} else if ("ana".equals(localname)) { |
|
388 |
//if (!mustChangeAnaValue) |
|
389 |
super.processEndElement(); |
|
390 |
inAna = false; |
|
391 |
mustChangeAnaValue = false; |
|
392 |
} else if (previousEndAnnotations != null) { // force delete annotations |
|
393 |
// previously written in |
|
394 |
// the "w" case |
|
395 |
// System.out.println("previousEndAnnotations !!!!! "+previousEndAnnotations); |
|
396 |
boolean toDelete = false; |
|
397 |
for (int i = 0; i < previousEndAnnotations.size(); i++) { |
|
398 |
Annotation a = previousEndAnnotations.get(i); |
|
399 |
if (a.getType().equals(localname)) { // update the annotation |
|
400 |
// if (debug) |
|
401 |
// System.out.println(" found existing end annotation "+a); |
|
402 |
|
|
403 |
if ("#del".equals(a.getValue())) { // if |
|
404 |
// (!"#del".equals(a.getValue())) |
|
405 |
// { |
|
406 |
toDelete = true; |
|
407 |
if (debug) |
|
408 |
System.out.println(" force delete end annotation " + a); |
|
409 |
} |
|
410 |
|
|
411 |
previousEndAnnotations.remove(i); |
|
412 |
i--; |
|
413 |
break; // no need to continue |
|
414 |
} |
|
415 |
} |
|
416 |
if (!toDelete) { // the structure or annotation was not modified |
|
417 |
super.processEndElement(); |
|
418 |
} else { |
|
419 |
// if (debug) |
|
420 |
// System.out.println("DELETING END at "+position_counter+" localname="+localname+": "+previousEndAnnotations); |
|
421 |
} |
|
422 |
} else { |
|
423 |
// System.out.println("X END="+localname); |
|
424 |
super.processEndElement(); |
|
425 |
} |
|
426 |
} |
|
427 |
|
|
428 |
private void writeStartAnnotation(Annotation a) { |
|
429 |
try { |
|
430 |
writer.writeStartElement("txm:" + a.getType()); |
|
431 |
writer.writeAttribute("author", "" + a.getAnnotator()); |
|
432 |
writer.writeAttribute("ref", a.getValue()); |
|
433 |
writer.writeAttribute("date", BaseParameters.dateformat.format(new Date())); |
|
434 |
writer.writeAttribute("start", Integer.toString(a.getStart())); |
|
435 |
writer.writeAttribute("end", Integer.toString(a.getEnd())); |
|
436 |
} catch (XMLStreamException e) { |
|
437 |
// TODO Auto-generated catch block |
|
438 |
e.printStackTrace(); |
|
439 |
} |
|
440 |
} |
|
441 |
|
|
442 |
private void writeEndAnnotation(Annotation a) { |
|
443 |
try { |
|
444 |
writer.writeEndElement(); |
|
445 |
} catch (XMLStreamException e) { |
|
446 |
// TODO Auto-generated catch block |
|
447 |
e.printStackTrace(); |
|
448 |
} |
|
449 |
} |
|
450 |
|
|
451 |
//////////// TESTS /////////// |
|
452 |
|
|
453 |
public static ArrayList<Annotation> testAdding(){ |
|
454 |
|
|
455 |
Annotation a1 = new Annotation("Actr", "Actr100", 15, 16); |
|
456 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
457 |
annotations.add(a1); |
|
458 |
return annotations; |
|
459 |
} |
|
460 |
|
|
461 |
public static ArrayList<Annotation> testAddingInclusive(){ |
|
462 |
|
|
463 |
Annotation a1 = new Annotation("CoAc", "CoAc2093", 15, 19); |
|
464 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
465 |
annotations.add(a1); |
|
466 |
return annotations; |
|
467 |
} |
|
468 |
|
|
469 |
public static ArrayList<Annotation> testAddingInclusive2(){ |
|
470 |
|
|
471 |
Annotation a1 = new Annotation("Actr", "Actr100", 15, 16); |
|
472 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
473 |
annotations.add(a1); |
|
474 |
return annotations; |
|
475 |
} |
|
476 |
|
|
477 |
public static ArrayList<Annotation> testUpdating(){ |
|
478 |
|
|
479 |
Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); |
|
480 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
481 |
annotations.add(a1); |
|
482 |
return annotations; |
|
483 |
} |
|
484 |
|
|
485 |
public static ArrayList<Annotation> testUpdatingDeleting(){ |
|
486 |
Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); |
|
487 |
Annotation a2 = new Annotation("CoAc", "#del", 15, 19); |
|
488 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
489 |
annotations.add(a1); |
|
490 |
annotations.add(a2); |
|
491 |
return annotations; |
|
492 |
} |
|
493 |
|
|
494 |
public static ArrayList<Annotation> testAddDeleting2(){ |
|
495 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
496 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD |
|
497 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
498 |
annotations.add(a1); |
|
499 |
annotations.add(a2); |
|
500 |
return annotations; |
|
501 |
} |
|
502 |
|
|
503 |
public static ArrayList<Annotation> testUpdatingDeletingAdding(){ |
|
504 |
Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); //UPDATE |
|
505 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD |
|
506 |
Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE |
|
507 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
508 |
annotations.add(a1); |
|
509 |
annotations.add(a2); |
|
510 |
annotations.add(a3); |
|
511 |
return annotations; |
|
512 |
} |
|
513 |
|
|
514 |
public static ArrayList<Annotation> testUpdatingDeletingAdding2(){ // NOT WORKING |
|
515 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
516 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD same place as CoAc |
|
517 |
Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE |
|
518 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
519 |
annotations.add(a1); |
|
520 |
annotations.add(a2); |
|
521 |
annotations.add(a3); |
|
522 |
return annotations; |
|
523 |
} |
|
524 |
|
|
525 |
public static ArrayList<Annotation> testUpdatingDeletingAdding3(){ // NOT WORKING |
|
526 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
527 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD same place as Actr |
|
528 |
Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE |
|
529 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
530 |
annotations.add(a1); |
|
531 |
annotations.add(a2); |
|
532 |
annotations.add(a3); |
|
533 |
return annotations; |
|
534 |
} |
|
535 |
|
|
536 |
public static ArrayList<Annotation> testUpdatingDeletingAdding4(){ |
|
537 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
538 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 13, 20); //ADD Outside Actr and CoAc |
|
539 |
Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE |
|
540 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
541 |
annotations.add(a1); |
|
542 |
annotations.add(a2); |
|
543 |
annotations.add(a3); |
|
544 |
return annotations; |
|
545 |
} |
|
546 |
|
|
547 |
public static ArrayList<Annotation> testUpdatingDeletingAdding5(){ |
|
548 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
549 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD |
|
550 |
Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE |
|
551 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
552 |
annotations.add(a1); |
|
553 |
annotations.add(a2); |
|
554 |
annotations.add(a3); |
|
555 |
return annotations; |
|
556 |
} |
|
557 |
|
|
558 |
public static ArrayList<Annotation> testUpdatingDeletingAdding6(){ |
|
559 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE |
|
560 |
Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD |
|
561 |
Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE |
|
562 |
Annotation a4 = new Annotation("CoAc", "CoAc1", 15, 18); //DELETE |
|
563 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
564 |
annotations.add(a1); |
|
565 |
annotations.add(a2); |
|
566 |
annotations.add(a3); |
|
567 |
annotations.add(a4); |
|
568 |
return annotations; |
|
569 |
} |
|
570 |
|
|
571 |
public static ArrayList<Annotation> testUpdatingInclusive(){ |
|
572 |
Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); |
|
573 |
Annotation a2 = new Annotation("CoAc", "CoAc321", 15, 19); |
|
574 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
575 |
annotations.add(a1); |
|
576 |
annotations.add(a2); |
|
577 |
return annotations; |
|
578 |
} |
|
579 |
|
|
580 |
public static ArrayList<Annotation> testDeleting(){ |
|
581 |
|
|
582 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); |
|
583 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
584 |
annotations.add(a1); |
|
585 |
return annotations; |
|
586 |
} |
|
587 |
|
|
588 |
public static ArrayList<Annotation> testDeletingInclusive(){ |
|
589 |
|
|
590 |
Annotation a1 = new Annotation("Actr", "#del", 15, 16); |
|
591 |
Annotation a2 = new Annotation("CoAc", "#del", 15, 16); |
|
592 |
ArrayList<Annotation> annotations = new ArrayList<Annotation>(); |
|
593 |
annotations.add(a1); |
|
594 |
annotations.add(a2); |
|
595 |
return annotations; |
|
596 |
} |
|
597 |
|
|
598 |
static int ADD = 1; |
|
599 |
static int UPDATE = 2; |
|
600 |
static int DELETE = 3; |
|
601 |
static int ADD2 = 4; |
|
602 |
static int UPDATE2 = 5; |
|
603 |
static int DELETE2 = 6; |
|
604 |
static int DELETE3 = 7; |
|
605 |
static int DELETE4 = 8; |
|
606 |
static int ADD3 = 9; |
|
607 |
static int UPDATE3 = 10; |
|
608 |
static int UPDATEDELETE = 11; |
|
609 |
static int UPDATEDELETEADD = 12; |
|
610 |
static int UPDATEDELETEADD2 = 13; |
|
611 |
static int UPDATEDELETEADD3 = 14; |
|
612 |
static int UPDATEDELETEADD4 = 15; |
|
613 |
static int UPDATEDELETEADD5 = 16; |
|
614 |
static int UPDATEDELETEADD6 = 17; |
|
615 |
static int ADDDELETE2 = 18; |
|
616 |
|
|
617 |
public static void main2(String args[]) throws IOException, XMLStreamException { |
|
618 |
File xmlFile = null; |
|
619 |
int corpus_start_position = 10; |
|
620 |
|
|
621 |
ArrayList<Annotation> annotations = null; |
|
622 |
int test = UPDATEDELETEADD3;//AnnotationWriter.UPDATE;// AnnotationWriter.ADD; |
|
623 |
switch (test) { |
|
624 |
case 1 : |
|
625 |
//AnnotationWriter.ADD |
|
626 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST.xml"); |
|
627 |
annotations = testAdding(); /// OK |
|
628 |
break; |
|
629 |
case 2 : |
|
630 |
//AnnotationWriter.UPDATE |
|
631 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST2.xml"); |
|
632 |
annotations = testUpdating(); /// OK |
|
633 |
break; |
|
634 |
case 3 : |
|
635 |
//AnnotationWriter.DELETE |
|
636 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST4.xml"); |
|
637 |
annotations = testDeleting(); /// OK |
|
638 |
break; |
|
639 |
case 4 : |
|
640 |
//AnnotationWriter.ADD2 |
|
641 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3.xml"); |
|
642 |
annotations = testAddingInclusive(); /// OK |
|
643 |
break; |
|
644 |
case 5 : |
|
645 |
//AnnotationWriter.UPDATE2 |
|
646 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
647 |
annotations = testUpdating(); /// OK |
|
648 |
break; |
|
649 |
case 6 : |
|
650 |
//AnnotationWriter.DELETE2 |
|
651 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml"); |
|
652 |
annotations = testDeleting(); |
|
653 |
break; |
|
654 |
case 7 : |
|
655 |
//AnnotationWriter.DELETE3 |
|
656 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml"); |
|
657 |
annotations = testDeletingInclusive(); |
|
658 |
break; |
|
659 |
case 8 : |
|
660 |
//AnnotationWriter.DELETE4 |
|
661 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST7.xml"); |
|
662 |
annotations = testDeletingInclusive(); |
|
663 |
break; |
|
664 |
case 9 : |
|
665 |
//AnnotationWriter.ADD3 |
|
666 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3bis.xml"); |
|
667 |
annotations = testAddingInclusive2(); /// OK |
|
668 |
break; |
|
669 |
case 10 : |
|
670 |
//AnnotationWriter.UPDATE3 |
|
671 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
672 |
annotations = testUpdatingInclusive(); /// OK |
|
673 |
break; |
|
674 |
case 11 : |
|
675 |
//AnnotationWriter.UPDATEDELETE |
|
676 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
677 |
annotations = testUpdatingDeleting(); /// OK |
|
678 |
break; |
|
679 |
case 12 : |
|
680 |
//AnnotationWriter.UPDATEDELETEADD |
|
681 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
682 |
annotations = testUpdatingDeletingAdding(); /// NOT OK |
|
683 |
break; |
|
684 |
case 13 : |
|
685 |
//AnnotationWriter.UPDATEDELETEADD2 |
|
686 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
687 |
annotations = testUpdatingDeletingAdding2(); /// OK |
|
688 |
break; |
|
689 |
case 14 : |
|
690 |
//AnnotationWriter.UPDATEDELETEADD3 |
|
691 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
692 |
annotations = testUpdatingDeletingAdding3(); /// OK |
|
693 |
break; |
|
694 |
case 15 : |
|
695 |
//AnnotationWriter.UPDATEDELETEADD4 -- add 1 annotation over the 2 others |
|
696 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
697 |
annotations = testUpdatingDeletingAdding4(); /// OK |
|
698 |
break; |
|
699 |
case 16 : |
|
700 |
//AnnotationWriter.UPDATEDELETEADD5 -- delete 2 annotations, add 1 |
|
701 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
702 |
annotations = testUpdatingDeletingAdding5(); /// OK |
|
703 |
break; |
|
704 |
case 17 : |
|
705 |
//AnnotationWriter.UPDATEDELETEADD6 -- delete 2 annotations, add 2 |
|
706 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
707 |
annotations = testUpdatingDeletingAdding6(); /// NOT OK |
|
708 |
break; |
|
709 |
case 18 : |
|
710 |
//AnnotationWriter.ADDDELETE -- delete 1, add 1 |
|
711 |
xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml"); |
|
712 |
annotations = testAddDeleting2(); /// OK |
|
713 |
break; |
|
714 |
// |
|
715 |
default: break; |
|
716 |
} |
|
717 |
|
|
718 |
// no token annotations |
|
719 |
AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, annotations, new ArrayList<Annotation>(), corpus_start_position, true); |
|
720 |
File outfile = new File(xmlFile.getParentFile(), "result-"+xmlFile.getName()); |
|
721 |
if (annotationInjector.process(outfile)) { |
|
722 |
if (!ValidateXml.test(outfile)) { |
|
723 |
System.out.println("FAIL"); |
|
724 |
} else { |
|
725 |
System.out.println("SUCCESS ??"); |
|
726 |
} |
|
727 |
} |
|
728 |
|
|
729 |
} |
|
730 |
|
|
731 |
public static void main(String[] args) { |
|
732 |
try { |
|
733 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j.xml"); |
|
734 |
File outfile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j-annot.xml"); |
|
735 |
|
|
736 |
ArrayList<Annotation> segmentAnnotations = new ArrayList<Annotation>(); |
Formats disponibles : Unified diff