Révision 344

tmp/org.txm.annotation.core/.settings/org.eclipse.jdt.core.prefs (revision 344)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
4
org.eclipse.jdt.core.compiler.compliance=1.6
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.6
0 8

  
tmp/org.txm.annotation.core/.classpath (revision 344)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
5
	<classpathentry kind="src" path="src"/>
6
	<classpathentry kind="output" path="bin"/>
7
</classpath>
0 8

  
tmp/org.txm.annotation.core/META-INF/MANIFEST.MF (revision 344)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: Annotation
4
Bundle-SymbolicName: org.txm.annotation.core
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: org.txm.annotation.Activator
7
Require-Bundle: org.eclipse.ui,
8
 org.eclipse.core.runtime,
9
 org.txm.core;bundle-version="0.7.0"
10
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
11
Bundle-ActivationPolicy: lazy
12
Export-Package: org.txm.annotation,
13
 org.txm.annotation.conversion,
14
 org.txm.annotation.repository,
15
 org.txm.annotation.storage.temporary
0 16

  
tmp/org.txm.annotation.core/.project (revision 344)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.annotation.core</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationSyMoGIHWriter.java (revision 344)
1
package org.txm.annotation;
2

  
3
import java.io.File;
4
import java.io.FileWriter;
5
import java.io.IOException;
6
import java.net.MalformedURLException;
7
import java.util.ArrayList;
8
import java.util.HashMap;
9
import java.util.HashSet;
10
import java.util.List;
11

  
12
import javax.xml.stream.XMLInputFactory;
13
import javax.xml.stream.XMLOutputFactory;
14
import javax.xml.stream.XMLStreamException;
15
import javax.xml.stream.XMLStreamWriter;
16

  
17
import org.txm.annotation.repository.AnnotationType;
18
import org.txm.importer.StaxIdentityParser;
19

  
20
/**
21
 * The Class AnnotationStandoffInjector.
22
 *
23
 * @author sgedzelman, mdecorde
24
 *
25
 * copy a XML-TXM file without annotations elements
26
 * and creates annotations XML-TEI-SymoGIH annotation files for each annotation author
27
 * 
28
 */
29
public class AnnotationSyMoGIHWriter extends StaxIdentityParser {
30

  
31
	File xmlStandOffDirectory;
32
	boolean debug = false;
33

  
34
	String currentRef ;
35
	String currentAuthor ;
36
	String currentDate;
37
	String currentStartPos ;
38
	String currentEndPos ;
39
	//read xmlFile, to find annotations and update/write to xmlstandofffile
40
	//order annotations by annotator
41
	////// order annotations by date
42
	HashSet<String> types = new HashSet<String>();
43
	ArrayList<String> positions;
44
	HashMap<String, ArrayList<String>> annotationsPositions;
45
	XMLStreamWriter currentWriter;
46
	XMLStreamWriter standoffWriter;
47
	String currentType;
48
	boolean startAnnotation = false;
49
	private String filename;
50

  
51
	// author -> date -> annotation_values
52
	HashMap<String, HashMap<String, ArrayList<String[]>>> allannotations = new HashMap<String, HashMap<String, ArrayList<String[]>>>();
53
	HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>> allannotationspositions = new HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>>();
54
	private String textid;
55

  
56
	/**
57
	 * 
58
	 * @param xmlStandOffFile
59
	 * @param tokenAnnotations 
60
	 * @param annotationsToAddByStartPos
61
	 * @param annotationsToAddByEndPos
62
	 * @param corpus_start_position
63
	 * @param debug
64
	 * @throws XMLStreamException 
65
	 * @throws IOException 
66
	 */
67
	public AnnotationSyMoGIHWriter(String textid, File xmlFile, File xmlStandOffDirectory, List<AnnotationType> types, boolean debug) throws IOException, XMLStreamException {
68
		super(xmlFile.toURI().toURL()); // init reader and writer
69
		this.textid = textid;
70
		this.filename = xmlFile.getName();
71
		this.debug = debug;
72
		this.xmlStandOffDirectory = xmlStandOffDirectory;
73
		factory = XMLInputFactory.newInstance();
74
		annotationsPositions = new HashMap<String, ArrayList<String>>();
75
		//System.out.println("AnnotationStandoff -  ");
76
		for (AnnotationType type : types){
77
			this.types.add(type.getId()); //.getName().toLowerCase());
78
			//System.out.println("Annotation Types in TXM : "+type.getName().toLowerCase() +" vs "+ type.getId());
79
		}
80
	}
81

  
82
	/*<TEI xmlns="http://www.tei-c.org/ns/1.0">
83
    <teiHeader>
84
        <fileDesc>
85
            <titleStmt>
86
                <title>Title</title>
87
            </titleStmt>
88
            <publicationStmt>
89
                <p>Publication Information</p>
90
            </publicationStmt>
91
            <sourceDesc>
92
                <p>Ce document permet l'annotation sémantique de tous les textes concernant l'association avec des unités de connaissance</p>
93
            </sourceDesc>
94
        </fileDesc>
95
    </teiHeader>
96
    <text>
97
        <body>
98
            <div>
99
                <div>
100
                    <!-- La date dans le header indique la date d'annotation -->
101
                    <head>
102
                        <date type="annotation_date" when="2016-06-16"/>
103
                    </head>
104
                    <span type="identification d'entités nommées" ana="CoAc56389" 
105
                          target="#w_article_baip_1254-0714_1850_num_01_005_974_tei_2152 
106
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2153 
107
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2154 
108
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2155 
109
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2156" />
110
                </div>
111
            </div>
112
         </body>
113
      </text>
114
 </TEI>*/
115

  
116
	protected void processStartElement() throws XMLStreamException, IOException {
117
		//<coac author="gazelledess" ref="CoAc397" start="5" end="5">
118

  
119
		/*if(localname.startsWith("actr")){
120
			System.out.println("Check existence of actr in Corpus !!!! "+localname);
121
		}*/
122
		boolean foundAnnot = false;
123

  
124
		
125
		if (types.contains(localname) && parser.getPrefix().equals("txm")) { // don't write txm annotation elements
126
			//System.out.println(" START "+ localname);
127
			foundAnnot = true;
128
			currentType = localname;
129
			//<txm:actr author="gazelledess" ref="PhileasFogg" date="2016-09-05" start="56" end="57">
130
			currentAuthor = parser.getAttributeValue(null, "author");
131
			currentRef = parser.getAttributeValue(null, "ref");
132
			currentStartPos = parser.getAttributeValue(null, "start");
133
			currentEndPos = parser.getAttributeValue(null, "end");
134
			currentDate = parser.getAttributeValue(null, "date");
135
			//annotation is here
136
			startAnnotation = true;
137
			positions = new ArrayList<String>();
138
			annotationsPositions.put(currentType, positions);
139

  
140
			// initialize allannotations
141
			if (!allannotations.containsKey(currentAuthor)) {
142
				allannotations.put(currentAuthor, new HashMap<String, ArrayList<String[]>>());
143
				allannotationspositions.put(currentAuthor, new HashMap<String, ArrayList<ArrayList<String>>>());
144
			}
145
			HashMap<String, ArrayList<String[]>> authorsAnnotation = allannotations.get(currentAuthor);
146
			HashMap<String, ArrayList<ArrayList<String>>> authorsAnnotationPositions = allannotationspositions.get(currentAuthor);
147
			if (!authorsAnnotation.containsKey(currentDate)) {
148
				authorsAnnotation.put(currentDate, new ArrayList<String[]>());
149
				authorsAnnotationPositions.put(currentDate, new ArrayList<ArrayList<String>>());
150
			}
151

  
152
			storeAnnotation();
153
		}
154

  
155
		if (!foundAnnot) {
156
			super.processStartElement(); /// continue writing in file all elements, except the tags that are now in stand-off files
157

  
158
			// get words ids of the current annotations
159
			if (localname.equals("w") && startAnnotation) {
160
				String id = parser.getAttributeValue(null, "id");
161
				for (String typeIn : annotationsPositions.keySet()) {
162
					positions = annotationsPositions.get(typeIn);
163
					positions.add(id);
164
					//System.out.println("Positions of w id="+posW+" for ["+typeIn+"] ");
165
				}
166
			}
167
		}
168
	}
169

  
170

  
171
	/**
172
	 * ends the current author stand-off file
173
	 * @param currentWriter
174
	 */
175
	private void writeEndStandOffFile(XMLStreamWriter currentWriter){
176
		//System.out.println("writeEndStandOffFile ...");
177

  
178
		try {
179
			currentWriter.writeEndElement(); 
180
			currentWriter.writeEndDocument();
181

  
182
			currentWriter.flush();
183
			currentWriter.close();
184
		} catch (XMLStreamException e) {
185
			e.printStackTrace();
186
		}
187

  
188
	}
189

  
190
	/**
191
	 * Create the stand-off file for one author
192
	 * @param file
193
	 * @return
194
	 */
195
	private XMLStreamWriter writeStartStandOffFile(File file){
196
		//System.out.println("writeStartStandOffFile ...");
197

  
198
		String ns = "http://www.tei-c.org/ns/1.0";    
199
		XMLOutputFactory output = XMLOutputFactory.newInstance();
200
		XMLStreamWriter writer = null ;
201
		try {
202
			writer = output.createXMLStreamWriter(new FileWriter(file));
203
			writer.writeStartDocument();
204
			writer.setPrefix("tei", ns);
205
			writer.setDefaultNamespace(ns);
206

  
207
			writer.writeStartElement("TEI");
208

  
209
			writer.writeStartElement("teiHeader");
210
			writer.writeStartElement("fileDesc");
211

  
212
			writer.writeStartElement("titleStmt");
213
			writer.writeStartElement("title");
214
			writer.writeCharacters(textid); 
215
			writer.writeEndElement(); // title
216
			writer.writeEndElement(); // titleStmt
217

  
218
			writer.writeStartElement("publicationStmt");
219
			writer.writeStartElement("p");
220
			writer.writeCharacters("PUBLICATION INFO à renseigner");
221
			writer.writeEndElement(); // p
222
			writer.writeEndElement(); // publicationStmt
223

  
224
			writer.writeStartElement("sourceDesc");
225
			writer.writeStartElement("p");
226
			writer.writeCharacters("Ce document permet l'annotation sémantique de tous les textes, par auteur");
227
			writer.writeEndElement(); // p
228
			writer.writeEndElement(); // sourceDesc
229

  
230
			writer.writeEndElement(); // </fileDesc>
231
			writer.writeStartElement("encodingDesc");
232
			writer.writeStartElement("projectDesc");
233
			writer.writeStartElement("p");
234
			writer.writeCharacters("Annotations created by "+currentAuthor+", for the use in Symogih XML platform");
235
			writer.writeEndElement(); // p
236
			writer.writeEndElement(); // </projectDesc>
237
			writer.writeEndElement(); // </encodingDesc>
238
			writer.writeEndElement(); // </teiHeader>
239

  
240

  
241
			writer.writeStartElement("text");
242
			writer.writeStartElement("body");
243
			writer.writeCharacters("\n");
244
			writer.writeStartElement("div");
245
			writer.writeCharacters("\n");
246
		} catch (XMLStreamException e) {
247
			// TODO Auto-generated catch block
248
			e.printStackTrace();
249
		} catch (IOException e) {
250
			// TODO Auto-generated catch block
251
			e.printStackTrace();
252
		}
253
		return writer;
254
	}
255

  
256
	private void storeAnnotation() {
257
		allannotations.get(currentAuthor).get(currentDate).add(new String[]{currentDate, currentRef, currentType});
258
	}
259

  
260
	private void storeAnnotationPositions() {
261
		allannotationspositions.get(currentAuthor).get(currentDate).add(positions);
262
	}
263

  
264
	/**
265
	 * write stand-off annotation
266
	 */
267
	private void writeStartAnnotationToStandoffFile(String[] data, ArrayList<String> positions) {
268
		//System.out.println("writeStartAnnotationToStandoffFile ...");
269
		try {
270
			currentWriter.writeStartElement("span");
271
			currentWriter.writeAttribute("type","named_entities_identifications");
272
			currentWriter.writeAttribute("ana", data[1]);
273
			StringBuffer listWids = new StringBuffer();
274
			for (String posW : positions) {
275
				listWids.append("#"+posW+" ");
276
			}
277
			currentWriter.writeAttribute("target", listWids.toString().trim());
278
			currentWriter.writeComment("type="+data[2]);
279
			currentWriter.writeEndElement(); // span
280
			currentWriter.writeCharacters("\n");
281

  
282
		} catch (XMLStreamException e) {
283
			// TODO Auto-generated catch block
284
			e.printStackTrace();
285
		}
286
	}
287

  
288
	protected void processEndElement() throws XMLStreamException {
289
		boolean foundAnnot = false;
290
		if (types.contains(localname) && parser.getPrefix().equals("txm")) { // skip annotation end element
291
			//System.out.println(" END "+ localname);
292
			foundAnnot = true;
293
			//annotation ends here
294
			storeAnnotationPositions();
295

  
296
			if (annotationsPositions.containsKey(localname)) {
297
				annotationsPositions.remove(localname);
298
			}
299

  
300
			startAnnotation = false;
301
		}
302

  
303
		if (!foundAnnot) {
304
			if (localname.equals("TEI")) {
305
				try {
306
					for (String author : allannotations.keySet()) {
307
						System.out.println("    author="+author);
308
						File currentXmlFile = new File(xmlStandOffDirectory, filename.substring(0, filename.length()-4)+"_"+currentAuthor+"_annotations.xml");
309

  
310
						// write start of the stand-off file
311
						currentWriter = writeStartStandOffFile(currentXmlFile);
312

  
313
						// write annotations grouped per date
314
						HashMap<String, ArrayList<String[]>> dates = allannotations.get(author);
315
						HashMap<String, ArrayList<ArrayList<String>>> datesPositions = allannotationspositions.get(author);
316
						for (String date : datesPositions.keySet()) {
317
							ArrayList<String[]> datas = dates.get(date);
318
							ArrayList<ArrayList<String>> positions = datesPositions.get(date);
319

  
320
							currentWriter.writeStartElement("div");
321
							currentWriter.writeCharacters("\n");
322
							currentWriter.writeStartElement("head");
323
							currentWriter.writeStartElement("date");
324
							currentWriter.writeAttribute("type","annotation_date");
325
							currentWriter.writeAttribute("when", date); 
326
							currentWriter.writeEndElement(); // date
327
							currentWriter.writeEndElement(); // head
328
							currentWriter.writeCharacters("\n");
329

  
330
							for (int i = 0 ; i < datas.size() ; i++) {
331
								writeStartAnnotationToStandoffFile(datas.get(i), positions.get(i));
332
							}
333

  
334
							currentWriter.writeEndElement(); // div
335
							currentWriter.writeCharacters("\n");
336
						}
337

  
338
						// write the end of the stand-off file
339
						writeEndStandOffFile(currentWriter);
340
					}
341
				}
342
				catch(Exception e){
343
					System.out.println("Current writer "+currentWriter);
344
				}
345
			}
346
			super.processEndElement();
347
		}
348
	}
349

  
350
	public static void main(String strs[]){
351
		try {
352
		File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J", "tdm80j.xml");
353
		File outfile = new File(xmlFile.getParentFile(), "tdm80j-tei.xml");
354

  
355
		File xmlStandOffDirectory = xmlFile.getParentFile();
356
		List<AnnotationType> types = new ArrayList<AnnotationType>();
357
		types.add(new AnnotationType("local","actr_id", "actr"));
358
		AnnotationSyMoGIHWriter annotStdff;
359
		try {
360
			annotStdff = new AnnotationSyMoGIHWriter("TDBM80J", xmlFile, xmlStandOffDirectory, types, true);
361
			annotStdff.process(outfile);
362
		} catch (MalformedURLException e) {
363
			// TODO Auto-generated catch block
364
			e.printStackTrace();
365
		}
366
		} catch(Exception e) {
367
			e.printStackTrace();
368
		}
369
	}
370
}
0 371

  
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationWriter.java (revision 344)
1
package org.txm.annotation;
2

  
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.FileOutputStream;
6
import java.io.IOException;
7
import java.io.InputStream;
8
import java.io.OutputStream;
9
import java.util.ArrayList;
10
import java.util.HashMap;
11
import java.util.List;
12
import java.util.logging.Level;
13

  
14
import javax.xml.stream.XMLStreamException;
15

  
16
import org.apache.commons.lang.StringUtils;
17
import org.txm.Toolbox;
18
import org.txm.annotation.repository.AnnotationEffect;
19
import org.txm.annotation.repository.AnnotationType;
20
import org.txm.annotation.repository.KnowledgeRepository;
21
import org.txm.core.preferences.TBXPreferences;
22
import org.txm.importer.ValidateXml;
23
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
24
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
25
import org.txm.searchengine.cqp.corpus.MainCorpus;
26
import org.txm.searchengine.cqp.serverException.CqiServerError;
27
import org.txm.stat.utils.ConsoleProgressBar;
28
import org.txm.utils.DeleteDir;
29
import org.txm.utils.logger.Log;
30
import org.txm.utils.zip.Zip;
31
import org.w3c.dom.Document;
32
import org.w3c.dom.Node;
33

  
34

  
35
public class AnnotationWriter {
36

  
37
	MainCorpus corpus;
38
	private List<AnnotationType> types;
39
	private KnowledgeRepository defaultKR;
40

  
41
	public AnnotationWriter(MainCorpus corpus){
42
		this.corpus = corpus;
43
		defaultKR = corpus.getKnowledgeRepository(corpus.getKnowledgeRepositoryNames().get(0));
44
		types = defaultKR.getAllAnnotationTypes();
45
		
46
	}
47

  
48
	/**
49
	 * process a text to build standoff files
50
	 * 
51
	 * @param textid
52
	 * @param currentXMLFile
53
	 * @param currentXMLStandoffFile
54
	 * @param xmlStandOffDirectory
55
	 * @return
56
	 * @throws IOException
57
	 * @throws CqiServerError
58
	 * @throws CqiClientException
59
	 * @throws InvalidCqpIdException
60
	 * @throws XMLStreamException 
61
	 */
62
	protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
63
		System.out.println("  text="+textid);
64
		boolean show_debug = Log.getLevel().intValue() < Level.WARNING.intValue();
65
		
66
		AnnotationSyMoGIHWriter annotationstdoff = new AnnotationSyMoGIHWriter(textid, currentXMLFile, xmlStandOffDirectory, types, show_debug);
67
		
68
		///rather test on the new xml standoff files
69
		if (annotationstdoff.process(currentXMLStandoffFile)) {
70
			if (ValidateXml.test(currentXMLStandoffFile)) { //TODO ALSO check if annotations are well-written 
71
				return true;
72
			} else {
73
				System.out.println("Error: result file "+currentXMLStandoffFile+" is malformed.");
74
			}
75
		} else {
76
			System.out.println("Error: while processing "+currentXMLStandoffFile+" in standoff dir");
77
		}
78
		return false;
79
		
80
	}
81
	
82
	/**
83
	 * Writing annotations in standoff files for each text of the corpus
84
	 * 
85
	 * @param allCorpusAnnotations
86
	 * @return
87
	 * @throws IOException
88
	 * @throws CqiServerError
89
	 * @throws CqiClientException
90
	 * @throws InvalidCqpIdException
91
	 * @throws XMLStreamException 
92
	 */
93
	public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
94
		
95
		List<String> textsIds = corpus.getTextsID();
96
		System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+".");
97

  
98
		File resultDirectory = new File(Toolbox.getParam(TBXPreferences.USER_TXM_HOME), "results/"+corpus.getName()+"_annotations");
99
		DeleteDir.deleteDirectory(resultDirectory);
100
		resultDirectory.mkdirs();
101
		if (!(resultDirectory.exists() && resultDirectory.canWrite())) {
102
			System.out.println("ERROR: could not create/write temporary directory: "+resultDirectory);
103
			return false;
104
		}
105
		
106
		File inputDirectory = corpus.getBaseDirectory();
107
		File corpusTxmDirectory = new File(inputDirectory, "txm/"+corpus.getName());
108
		
109
		System.out.println("Writing annotations XML files in "+resultDirectory);
110
		for (String textid : textsIds) { 
111
			File currentXMLFile = new File(corpusTxmDirectory, textid+".xml");
112
			if (!currentXMLFile.exists()) {
113
				System.out.println("Error: cannot found XML file for text with id="+textid);
114
				return false;
115
			}
116
			File currentXMLStandoffFile = new File(resultDirectory, textid+".xml"); //To Be Changed ?
117
			if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory )) {
118
						System.out.println("Error while writing annotations of text "+currentXMLStandoffFile);
119
						return false;
120
			}
121
		}
122
		
123
		Zip.compress(resultDirectory, resultZipFile, new ConsoleProgressBar(1));
124
		DeleteDir.deleteDirectory(resultDirectory);
125
		
126
		System.out.println("Annotations saved in "+resultZipFile.getAbsolutePath());
127
		return resultZipFile.exists();
128
	}
129
	
130
	/**
131
	 * 
132
	 * @param allCorpusAnnotations ordered annotations
133
	 * @return
134
	 * @throws IOException
135
	 * @throws CqiServerError
136
	 * @throws CqiClientException
137
	 * @throws InvalidCqpIdException
138
	 * @throws XMLStreamException 
139
	 */
140
	public boolean writeAnnotations(List<Annotation> allCorpusAnnotations) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
141
		//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName());
142
		System.out.println("Saving "+allCorpusAnnotations.size()+"annotations...");
143

  
144
		int[] end_limits = corpus.getTextEndLimits();
145
		int[] start_limits = corpus.getTextStartLimits();
146
		List<String> textsIds = corpus.getTextsID();
147

  
148
		File inputDirectory = corpus.getBaseDirectory();
149
		File txmDirectory = new File(inputDirectory, "txm/"+corpus.getName());
150

  
151
		ArrayList<Annotation> textAnnotations = new ArrayList<Annotation>();
152
		HashMap<String, ArrayList<Annotation>> annotationsPerTexts = new HashMap<String, ArrayList<Annotation>>();
153

  
154
		int currentText = 0;
155
		File currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
156

  
157
		//store first text
158
		currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
159
		textAnnotations = new ArrayList<Annotation>();
160
		annotationsPerTexts.put(textsIds.get(currentText), textAnnotations);
161

  
162
		// group annotations per text
163
		for (Annotation currentAnnot : allCorpusAnnotations) { // parse all annotations
164
			//System.out.println(" Annotation: "+currentAnnot);
165
			int pos = currentAnnot.getPK().getEndPosition();
166

  
167
			while (pos > end_limits[currentText]) { // while pos is not in the currentText.end 
168
				currentText++;
169
				currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
170
				textAnnotations = new ArrayList<Annotation>();
171
				annotationsPerTexts.put(textsIds.get(currentText), textAnnotations);
172
			}
173

  
174
			textAnnotations.add(currentAnnot);
175
		}
176

  
177
		File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_tmp");
178
		DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
179
		tmpXMLTXMDirectory.mkdirs();
180
		if (!(tmpXMLTXMDirectory.exists() && tmpXMLTXMDirectory.canWrite())) {
181
			System.out.println("ERROR: could not create directory: "+tmpXMLTXMDirectory);
182
			return false;
183
		}
184

  
185
		File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous");
186
		//DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
187
		previousXMLTXMDirectory.mkdirs();
188
		if (!(previousXMLTXMDirectory.exists()  && previousXMLTXMDirectory.canWrite())) {
189
			System.out.println("ERROR: could not create directory: "+previousXMLTXMDirectory);
190
			return false;
191
		}
192

  
193
		System.out.println("Annotations grouped per text for "+annotationsPerTexts.size()+" text"+(annotationsPerTexts.size() > 0?"s":""));
194
		System.out.println(" - Writing temporary XML files in: "+tmpXMLTXMDirectory);
195
		System.out.println(" - Copying previous version of XML files in: "+previousXMLTXMDirectory);
196
		// for all annotation PER TEXT, update the XML-TXM files
197
		currentText = 0;
198
		while (currentText < end_limits.length ) { // end limits : 10, 30, 45, 55, 103
199
			currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
200
			ArrayList<Annotation> allAnnotations = annotationsPerTexts.get(textsIds.get(currentText));
201
			if (allAnnotations != null && allAnnotations.size() > 0) {
202
				ArrayList<Annotation> allSegmentAnnotations = new ArrayList<Annotation>();
203
				ArrayList<Annotation> allTokenAnnotations = new ArrayList<Annotation>();
204

  
205
				KnowledgeRepository defaultKR = corpus.getKnowledgeRepository(corpus.getKnowledgeRepositoryNames().get(0));
206
				System.out.println("Using KR="+defaultKR);
207
				for (Annotation a : allAnnotations) {
208
					AnnotationType type = defaultKR.getType(a.getType());
209
					if (type != null) {
210
						if (AnnotationEffect.SEGMENT.equals(type.getEffect())) {
211
							allSegmentAnnotations.add(a);
212
						} else if (AnnotationEffect.TOKEN.equals(type.getEffect())) {
213
							allTokenAnnotations.add(a);
214
						} else {
215
							System.out.println("Annotation "+a+" with type="+a.getType()+" not found in default KR="+defaultKR);
216
						}
217
					} else {
218
						System.out.println("Warning: unknowed type: "+a.getType());
219
					}
220
				}
221

  
222
				if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText], 
223
						allSegmentAnnotations, allTokenAnnotations, 
224
						tmpXMLTXMDirectory, previousXMLTXMDirectory)) {
225
					System.out.println("Error while writing annotations of text "+currentXMLFile);
226
					return false;
227
				}
228
			}
229
			currentText++;
230
		}
231

  
232
		return true;
233
	}		
234

  
235
	//writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory))
236
	
237
	protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, 
238
			ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException, XMLStreamException{
239
		System.out.println("Writing annotations for text "+xmlFile+" segment annotations="+segmentAnnotations.size()+" token annotations="+tokenAnnotations.size());
240
		System.out.println(segmentAnnotations);
241
		System.out.println(tokenAnnotations);
242

  
243
		boolean show_debug = Log.getLevel().intValue() < Level.WARNING.intValue();
244
		AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, segmentAnnotations, tokenAnnotations, text_start_position, show_debug);
245

  
246
		File tmpfile = new File(tmpXMLTXMDirectory, xmlFile.getName());
247
		File previousfile = new File(previousXMLTXMDirectory, xmlFile.getName());
248

  
249
		if (annotationInjector.process(tmpfile)) {
250
			if (ValidateXml.test(tmpfile)) { //TODO ALSO check if annotations are well-written 
251
				previousfile.delete(); // in case there is one
252
				if (!previousfile.exists() && xmlFile.renameTo(previousfile)) {
253
					tmpfile.renameTo(xmlFile);
254
					return true;
255
				} else {
256
					System.out.println("Error: could not replace XML-TXM file: "+xmlFile+" with "+tmpfile);
257
				}
258
			} else {
259
				System.out.println("Error: result file "+tmpfile+" is malformed.");
260
			}
261
		} else {
262
			System.out.println("Error: while processing "+xmlFile+" in temp dir");
263
		}
264
		return false;
265
	}
266
}
0 267

  
tmp/org.txm.annotation.core/src/org/txm/annotation/AnnotationInjector.java (revision 344)
1
package org.txm.annotation;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.net.MalformedURLException;
6
import java.util.ArrayList;
7
import java.util.Collections;
8
import java.util.Comparator;
9
import java.util.Date;
10
import java.util.HashMap;
11
import java.util.List;
12

  
13
import javax.xml.stream.XMLInputFactory;
14
import javax.xml.stream.XMLStreamException;
15

  
16
import org.txm.importer.StaxIdentityParser;
17
import org.txm.importer.ValidateXml;
18
import org.txm.objects.BaseParameters;
19

  
20
/**
21
 * The Class AnnotationInjection.
22
 *
23
 * @author mdecorde
24
 *
25
 * inject annotation from a stand-off file into a xml-tei-txm file "id"
26
 */
27

  
28
public class AnnotationInjector extends StaxIdentityParser {
29

  
30
	File xmlFile;
31
	HashMap<Integer, List<Annotation>> annotationsToAddByStartPos; // contains
32
																	// annotation
33
																	// to write
34
	HashMap<Integer, List<Annotation>> annotationsToAddByEndPos;
35
	List<Annotation> currentStartAnnotations;
36
	List<Annotation> currentEndAnnotations;
37
	HashMap<String, Annotation> currentTokenAnnotations = new HashMap<String, Annotation>();
38

  
39
	int n = 0;
40
	boolean debug = false;
41

  
42
	String data; // the word id properties to add/replace
43
	String newform = null;
44
	String wordId;
45
	boolean inW = false;
46
	String anaType;
47

  
48
	int position_counter = 0;
49

  
50
	String closeNext = null; // the next close tag to delete
51
	ArrayList<String> openedElements = new ArrayList<String>(); // to ensure to
52
																// delete the
53
																// right element
54

  
55
	private List<Annotation> previousEndAnnotations;
56
	private ArrayList<Annotation> tokenAnnotations;
57

  
58
	String user;
59

  
60
	/**
61
	 * 
62
	 * @param xmlFile
63
	 * @param tokenAnnotations
64
	 * @param annotationsToAddByStartPos
65
	 * @param annotationsToAddByEndPos
66
	 * @param corpus_start_position
67
	 * @param debug
68
	 * @throws XMLStreamException 
69
	 * @throws IOException 
70
	 */
71
	public AnnotationInjector(File xmlFile, List<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, int corpus_start_position, boolean debug) throws IOException, XMLStreamException {
72
		super(xmlFile.toURI().toURL()); // init reader and writer
73

  
74
		this.user = System.getProperty("user.name");
75

  
76
		this.debug = debug;
77
		this.xmlFile = xmlFile;
78
		this.n = 0;
79
		// println ""+records.size()+" lines to process..."
80
		this.position_counter = corpus_start_position;
81
		factory = XMLInputFactory.newInstance();
82

  
83
		// preparing annotations to being written in the right inclusion order
84
		annotationsToAddByStartPos = new HashMap<Integer, List<Annotation>>();
85
		annotationsToAddByEndPos = new HashMap<Integer, List<Annotation>>();
86

  
87
		for (Annotation a : segmentAnnotations) {
88
			if (!annotationsToAddByStartPos.containsKey(a.getStart()))
89
				annotationsToAddByStartPos.put(a.getStart(), new ArrayList<Annotation>());
90
			if (!annotationsToAddByEndPos.containsKey(a.getEnd()))
91
				annotationsToAddByEndPos.put(a.getEnd(), new ArrayList<Annotation>());
92

  
93
			annotationsToAddByStartPos.get(a.getStart()).add(a);
94
			annotationsToAddByEndPos.get(a.getEnd()).add(a);
95
		}
96

  
97
		for (int i : annotationsToAddByStartPos.keySet()) {
98
			List<Annotation> a = annotationsToAddByStartPos.get(i);
99
			Collections.sort(a, new Comparator<Annotation>() { // reverse sort
100
																// annotation ->
101
																// write the
102
																// smaller in
103
																// the bigger
104
						@Override
105
						public int compare(Annotation arg0, Annotation arg1) {
106
							return arg1.getEnd() - arg0.getEnd();
107
						}
108
					});
109
		}
110
		for (int i : annotationsToAddByEndPos.keySet()) {
111
			List<Annotation> a = annotationsToAddByEndPos.get(i);
112
			Collections.sort(a, new Comparator<Annotation>() { // reverse sort
113
																// annotation ->
114
																// write the
115
																// smaller in
116
																// the bigger
117
						@Override
118
						public int compare(Annotation arg0, Annotation arg1) {
119
							return arg1.getStart() - arg0.getStart();
120
						}
121
					});
122
		}
123

  
124
		// sorting token annotations
125
		this.tokenAnnotations = tokenAnnotations;
126
		Collections.sort(tokenAnnotations, new Comparator<Annotation>() {
127
			@Override
128
			public int compare(Annotation arg0, Annotation arg1) {
129
				return arg0.getStart() - arg1.getStart();
130
			}
131
		});
132

  
133
		if (debug) {
134
			System.out.println("annotations for " + xmlFile);
135
			System.out.println(" segment grouped and ordered by start position: " + annotationsToAddByStartPos);
136
			System.out.println(" segment grouped and ordered by end position: " + annotationsToAddByEndPos);
137
			System.out.println(" token ordered by start position: " + tokenAnnotations);
138
		}
139
	}
140

  
141
	boolean mustChangeAnaValue = false;
142
	boolean inAna = false;
143

  
144
	protected void processStartElement() throws XMLStreamException, IOException {
145
		
146
		currentStartAnnotations = annotationsToAddByStartPos.get(position_counter);
147
		currentEndAnnotations = annotationsToAddByEndPos.get(position_counter);
148
		//System.out.println("A Starts: "+currentStartAnnotations+" "+currentEndAnnotations);
149
		// if (debug)
150
		// System.out.println("----- "+localname+"@"+position_counter+" START ANNOT = "+currentStartAnnotations);
151
		if ("teiHeader".equals(localname)) {
152
			super.processStartElement();
153
			goToEnd("teiHeader"); // skip teiHeader // will be closed in
154
									// processEndElement()
155
		} else if ("w".equals(localname)) {
156
			// System.out.println("W START pos="+position_counter+" annots="+currentStartAnnotations);
157
			if (currentStartAnnotations != null) {// there are still annotations
158
													// to write, possible
159
													// several for a position
160
				// if (debug)
161
				// System.out.println("WRITING START at "+position_counter+": "+currentStartAnnotations);
162
				for (Annotation a : currentStartAnnotations) {
163
					if (!"#del".equals(a.getValue())) {
164
						if (debug)
165
							System.out.println(" force write start annotation " + a);
166
						writeStartAnnotation(a);
167
					} else {
168
						if (debug)
169
							System.out.println(" no need to write start annotation " + a);
170
						currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());
171
						// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
172
						// " | currentEndAnnotations : "+currentEndAnnotations);
173
						if (currentEndAnnotations == null) {
174
							System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
175
						} else {
176
							currentEndAnnotations.remove(a);
177
						}
178
					}
179
				}
180
			}
181
			inW = true;
182

  
183
			// get token annotation if any
184
			currentTokenAnnotations.clear(); // current word annotations to write, may be empty
185
			Annotation a = null;
186
			
187
			if (tokenAnnotations.size() > 0) {
188
				a = tokenAnnotations.get(0);
189
				//System.out.println("for p="+position_counter+" next token annot="+a);
190
				
191
				if (a.getStart() <= position_counter && position_counter <= a.getEnd()) {
192
					// write the annotation
193
					currentTokenAnnotations.put(a.getType(), a);
194
					if (a.getEnd() == position_counter) // its the last word that needs to write this annotation
195
						tokenAnnotations.remove(0);
196
				}
197
			}
198

  
199
			super.processStartElement(); // write the tag
200
		} else if ("ana".equals(localname)) {
201
			inAna = true;
202
			String type = parser.getAttributeValue(null, "type").substring(1);
203
			if (currentTokenAnnotations.containsKey(type)) {
204
				Annotation a = currentTokenAnnotations.get(type);
205
				//System.out.println("Updating token annotation with: " + a);
206

  
207
				String value = a.getValue();
208
				if ("#del".equals(value))
209
					value = "";
210
				if (value == null)
211
					value = "ERROR";
212

  
213
				try {
214
					writer.writeStartElement("ana");
215
					writer.writeAttribute("type", "#" + type);
216
					writer.writeAttribute("resp", "#" + a.getAnnotator()); // change
217
																			// resp
218
					writer.writeCharacters(value);
219
					mustChangeAnaValue = true;
220
				} catch (XMLStreamException e) {
221
					// TODO Auto-generated catch block
222
					e.printStackTrace();
223
				}
224

  
225
				currentTokenAnnotations.remove(type); // annotation updated
226
			} else {
227
				super.processStartElement();
228
			}
229
		} else if (currentStartAnnotations != null) { // is it an annotations ?
230
			// System.out.println("A START="+localname);
231
			boolean toDelete = false;
232
			for (int i = 0; i < currentStartAnnotations.size(); i++) {
233
				Annotation a = currentStartAnnotations.get(i);
234
				// if (debug)
235
				// System.out.println("=============== Start annot : "+a);
236
				if (a.getType().equals(localname)) { // force delete, will be
237
														// written in the "w"
238
														// case
239
					int start = Integer.parseInt(parser.getAttributeValue(null, "start"));
240
					int end = Integer.parseInt(parser.getAttributeValue(null, "end"));
241
					if (a.getStart() == start && a.getEnd() == end) {
242
						// updated = true;
243
						// if (debug)
244
						// System.out.println(" found existing annotation "+a);
245
						if ("#del".equals(a.getValue())) {
246
							toDelete = true;
247
							// currentEndAnnotations.remove(a); // MAYBE NOT TO
248
							// DO THIS HERE ?
249
							if (debug)
250
								System.out.println(" force delete start annotation " + a);
251
						} else { // update existing annotation, no need to store
252
									// the end of annotation
253
							writeStartAnnotation(a);
254
							toDelete = true;
255
							if (debug)
256
								System.out.println(" update annotation " + a);
257
							currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());// EL
258
																								// NO
259
																								// CAPITO
260
																								// YET
261
																								// !!
262
							// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
263
							// " | currentEndAnnotations : "+currentEndAnnotations);
264
							if (currentEndAnnotations == null) {
265
								System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
266
							} else {
267
								currentEndAnnotations.remove(a);
268
							}
269
						}
270

  
271
						currentStartAnnotations.remove(i);
272
						i--;
273
					}
274

  
275
					break; // no need to continue
276
				} else {
277
					// System.out.println("------- with same start pos");
278
					String s_end = parser.getAttributeValue(null, "end");
279
					String s_start = parser.getAttributeValue(null, "start");
280
					String s_author = parser.getAttributeValue(null, "author");
281
					String s_ref = parser.getAttributeValue(null, "ref");
282
					if (s_start != null && s_end != null && s_author != null && s_ref != null) {
283
						int end = Integer.parseInt(s_end);
284
						if (a.getEnd() >= end) { // must write a
285
							if (!"#del".equals(a.getValue())) {
286
								if (debug)
287
									System.out.println(" writing of start annotation " + a);
288
								writeStartAnnotation(a);
289

  
290
							} else {
291
								toDelete = true;
292
								if (debug)
293
									System.out.println(" no writing of start annotation " + a);
294
								currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());
295
								// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
296
								// " | currentEndAnnotations : "+currentEndAnnotations);
297
								if (currentEndAnnotations == null) {
298
									System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
299
								} else {
300
									currentEndAnnotations.remove(a);
301
								}
302
							}
303
							currentStartAnnotations.remove(i);
304
							i--;
305
							// break;
306
						}
307
					}
308
				}
309
			}
310
			if (!toDelete) { // the structure or annotation was not modified
311
				super.processStartElement();
312
			}
313

  
314
		} else {
315
			// System.out.println("X START="+localname);
316
			super.processStartElement();
317
		}
318
	}
319

  
320
	@Override
321
	public void processCharacters() throws XMLStreamException {
322
		//System.out.println("processCharaters inAna="+inAna+" mustChangeAnaValue="+mustChangeAnaValue);
323
		if (inAna && mustChangeAnaValue) {
324
			// nothing content is already written
325
			//System.out.println("skip ana value because we replace it");
326
		} else {
327
			super.processCharacters();
328
		}
329
	}
330

  
331
	protected void processEndElement() throws XMLStreamException {
332
		// currentEndAnnotations =
333
		// annotationsToAddByEndPos.get(position_counter); // annotation to end
334
		previousEndAnnotations = annotationsToAddByEndPos.get(position_counter - 1); // existing
335
																						// (or
336
																						// not)
337
																						// annotation
338
																						// that
339
																						// have
340
																						// already
341
																						// been
342
																						// closed
343
		// if (debug)
344
		// System.out.println("----- "+localname+"@"+position_counter+" END ANNOT = "+currentEndAnnotations);
345

  
346
		if ("w".equals(localname)) {
347
			// System.out.println("W END");
348

  
349
			for (String type : currentTokenAnnotations.keySet()) {
350
				try {
351
					writer.writeStartElement("ana");
352
					writer.writeAttribute("type", "#" + type);
353
					writer.writeAttribute("resp", "#" + currentTokenAnnotations.get(type).getAnnotator()); // change
354
																											// resp
355
					writer.writeCharacters(currentTokenAnnotations.get(type).getValue());
356
					writer.writeEndElement();
357
				} catch (XMLStreamException e) {
358
					e.printStackTrace();
359
				}
360
			}
361

  
362
			super.processEndElement(); // write word then close annotations
363
			inW = false;
364
			// force write All known annotation after the word
365
			if (currentEndAnnotations != null) {// there are still annotations
366
												// to write, possible several
367
												// for a position
368
				// if (debug)
369
				// System.out.println("WRITING END at "+position_counter+": "+currentEndAnnotations);
370
				for (Annotation a : currentEndAnnotations) {
371
					// if (debug)
372
					// System.out.println("=============== End annot : "+a);
373
					if (!"#del".equals(a.getValue())) {
374
						if (debug)
375
							System.out.println(" force write end annotation " + a);
376
						writeEndAnnotation(a);
377
					}/*
378
					 * else { if (debug)
379
					 * System.out.println(" no need to write end annotation "
380
					 * +a); }
381
					 */
382

  
383
				}
384
			}
385

  
386
			position_counter++;
387
		} else if ("ana".equals(localname)) {
388
			//if (!mustChangeAnaValue)
389
				super.processEndElement();
390
			inAna = false;
391
			mustChangeAnaValue = false;
392
		} else if (previousEndAnnotations != null) { // force delete annotations
393
														// previously written in
394
														// the "w" case
395
			// System.out.println("previousEndAnnotations !!!!! "+previousEndAnnotations);
396
			boolean toDelete = false;
397
			for (int i = 0; i < previousEndAnnotations.size(); i++) {
398
				Annotation a = previousEndAnnotations.get(i);
399
				if (a.getType().equals(localname)) { // update the annotation
400
					// if (debug)
401
					// System.out.println(" found existing end annotation "+a);
402

  
403
					if ("#del".equals(a.getValue())) { // if
404
														// (!"#del".equals(a.getValue()))
405
														// {
406
						toDelete = true;
407
						if (debug)
408
							System.out.println(" force delete end annotation " + a);
409
					}
410

  
411
					previousEndAnnotations.remove(i);
412
					i--;
413
					break; // no need to continue
414
				}
415
			}
416
			if (!toDelete) { // the structure or annotation was not modified
417
				super.processEndElement();
418
			} else {
419
				// if (debug)
420
				// System.out.println("DELETING END at "+position_counter+" localname="+localname+": "+previousEndAnnotations);
421
			}
422
		} else {
423
			// System.out.println("X END="+localname);
424
			super.processEndElement();
425
		}
426
	}
427

  
428
	private void writeStartAnnotation(Annotation a) {
429
		try {
430
			writer.writeStartElement("txm:" + a.getType());
431
			writer.writeAttribute("author", "" + a.getAnnotator());
432
			writer.writeAttribute("ref", a.getValue());
433
			writer.writeAttribute("date", BaseParameters.dateformat.format(new Date()));
434
			writer.writeAttribute("start", Integer.toString(a.getStart()));
435
			writer.writeAttribute("end", Integer.toString(a.getEnd()));
436
		} catch (XMLStreamException e) {
437
			// TODO Auto-generated catch block
438
			e.printStackTrace();
439
		}
440
	}
441

  
442
	private void writeEndAnnotation(Annotation a) {
443
		try {
444
			writer.writeEndElement();
445
		} catch (XMLStreamException e) {
446
			// TODO Auto-generated catch block
447
			e.printStackTrace();
448
		}
449
	}
450
	
451
//////////// TESTS ///////////
452
	
453
	public static ArrayList<Annotation>  testAdding(){
454

  
455
		Annotation a1 = new Annotation("Actr", "Actr100", 15, 16);
456
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
457
		annotations.add(a1);
458
		return annotations;
459
	}
460

  
461
	public static ArrayList<Annotation>  testAddingInclusive(){
462

  
463
		Annotation a1 = new Annotation("CoAc", "CoAc2093", 15, 19);
464
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
465
		annotations.add(a1);
466
		return annotations;
467
	}
468

  
469
	public static ArrayList<Annotation>  testAddingInclusive2(){
470

  
471
		Annotation a1 = new Annotation("Actr", "Actr100", 15, 16);
472
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
473
		annotations.add(a1);
474
		return annotations;
475
	}
476

  
477
	public static ArrayList<Annotation>  testUpdating(){
478

  
479
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
480
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
481
		annotations.add(a1);
482
		return annotations;
483
	}
484

  
485
	public static ArrayList<Annotation>  testUpdatingDeleting(){	
486
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
487
		Annotation a2 = new Annotation("CoAc", "#del", 15, 19);		
488
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
489
		annotations.add(a1);
490
		annotations.add(a2);
491
		return annotations;
492
	}
493

  
494
	public static ArrayList<Annotation>  testAddDeleting2(){	
495
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
496
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD
497
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
498
		annotations.add(a1);
499
		annotations.add(a2);
500
		return annotations;
501
	}
502

  
503
	public static ArrayList<Annotation>  testUpdatingDeletingAdding(){	
504
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); //UPDATE
505
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD	
506
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19);	//DELETE	
507
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
508
		annotations.add(a1);
509
		annotations.add(a2);
510
		annotations.add(a3);
511
		return annotations;
512
	}
513

  
514
	public static ArrayList<Annotation>  testUpdatingDeletingAdding2(){	// NOT WORKING
515
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
516
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD  same place as CoAc
517
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
518
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
519
		annotations.add(a1);
520
		annotations.add(a2);
521
		annotations.add(a3);
522
		return annotations;
523
	}
524

  
525
	public static ArrayList<Annotation>  testUpdatingDeletingAdding3(){	// NOT WORKING
526
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
527
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD same place as Actr		
528
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
529
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
530
		annotations.add(a1);
531
		annotations.add(a2);
532
		annotations.add(a3);
533
		return annotations;
534
	}
535

  
536
	public static ArrayList<Annotation>  testUpdatingDeletingAdding4(){	
537
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
538
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 13, 20); //ADD Outside Actr and CoAc		
539
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
540
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
541
		annotations.add(a1);
542
		annotations.add(a2);
543
		annotations.add(a3);
544
		return annotations;
545
	}
546

  
547
	public static ArrayList<Annotation>  testUpdatingDeletingAdding5(){	
548
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
549
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD		
550
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE		
551
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
552
		annotations.add(a1);
553
		annotations.add(a2);
554
		annotations.add(a3);
555
		return annotations;
556
	}
557

  
558
	public static ArrayList<Annotation>  testUpdatingDeletingAdding6(){	 
559
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
560
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD		
561
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE		
562
		Annotation a4 = new Annotation("CoAc", "CoAc1", 15, 18); //DELETE
563
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
564
		annotations.add(a1);
565
		annotations.add(a2);
566
		annotations.add(a3);
567
		annotations.add(a4);
568
		return annotations;
569
	}
570

  
571
	public static ArrayList<Annotation>  testUpdatingInclusive(){	
572
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
573
		Annotation a2 = new Annotation("CoAc", "CoAc321", 15, 19);		
574
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
575
		annotations.add(a1);
576
		annotations.add(a2);
577
		return annotations;
578
	}
579

  
580
	public static ArrayList<Annotation>  testDeleting(){
581

  
582
		Annotation a1 = new Annotation("Actr", "#del", 15, 16);
583
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
584
		annotations.add(a1);
585
		return annotations;
586
	}
587

  
588
	public static ArrayList<Annotation>  testDeletingInclusive(){
589

  
590
		Annotation a1 = new Annotation("Actr", "#del", 15, 16);
591
		Annotation a2 = new Annotation("CoAc", "#del", 15, 16);		
592
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
593
		annotations.add(a1);
594
		annotations.add(a2);
595
		return annotations;
596
	}
597

  
598
	static int ADD = 1;
599
	static int UPDATE = 2;
600
	static int DELETE = 3;
601
	static int ADD2 = 4;
602
	static int UPDATE2 = 5;
603
	static int DELETE2 = 6;
604
	static int DELETE3 = 7;
605
	static int DELETE4 = 8;
606
	static int ADD3 = 9;
607
	static int UPDATE3 = 10;
608
	static int UPDATEDELETE = 11;
609
	static int UPDATEDELETEADD = 12;
610
	static int UPDATEDELETEADD2 = 13;
611
	static int UPDATEDELETEADD3 = 14;
612
	static int UPDATEDELETEADD4 = 15;
613
	static int UPDATEDELETEADD5 = 16;
614
	static int UPDATEDELETEADD6 = 17;
615
	static int ADDDELETE2 = 18;
616

  
617
	public static void main2(String args[]) throws IOException, XMLStreamException {
618
		File xmlFile = null;
619
		int corpus_start_position = 10;
620

  
621
		ArrayList<Annotation> annotations = null;
622
		int test = UPDATEDELETEADD3;//AnnotationWriter.UPDATE;// AnnotationWriter.ADD;
623
		switch (test) {
624
		case 1 : 
625
			//AnnotationWriter.ADD
626
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST.xml");
627
			annotations = testAdding(); /// OK
628
			break;
629
		case 2 : 
630
			//AnnotationWriter.UPDATE
631
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST2.xml");
632
			annotations = testUpdating(); /// OK
633
			break;
634
		case 3 : 
635
			//AnnotationWriter.DELETE
636
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST4.xml");
637
			annotations = testDeleting(); /// OK
638
			break;
639
		case 4 : 
640
			//AnnotationWriter.ADD2
641
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3.xml");
642
			annotations = testAddingInclusive(); /// OK
643
			break;
644
		case 5 : 
645
			//AnnotationWriter.UPDATE2
646
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
647
			annotations = testUpdating(); /// OK
648
			break;
649
		case 6 : 
650
			//AnnotationWriter.DELETE2
651
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml");
652
			annotations = testDeleting();
653
			break;
654
		case 7 : 
655
			//AnnotationWriter.DELETE3
656
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml");
657
			annotations = testDeletingInclusive();
658
			break;
659
		case 8 : 
660
			//AnnotationWriter.DELETE4
661
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST7.xml");
662
			annotations = testDeletingInclusive();
663
			break;
664
		case 9 : 
665
			//AnnotationWriter.ADD3
666
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3bis.xml");
667
			annotations = testAddingInclusive2(); /// OK
668
			break;
669
		case 10 : 
670
			//AnnotationWriter.UPDATE3
671
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
672
			annotations = testUpdatingInclusive(); /// OK
673
			break;
674
		case 11 : 
675
			//AnnotationWriter.UPDATEDELETE
676
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
677
			annotations = testUpdatingDeleting(); /// OK
678
			break;
679
		case 12 : 
680
			//AnnotationWriter.UPDATEDELETEADD
681
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
682
			annotations = testUpdatingDeletingAdding(); /// NOT OK
683
			break;
684
		case 13 : 
685
			//AnnotationWriter.UPDATEDELETEADD2
686
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
687
			annotations = testUpdatingDeletingAdding2(); /// OK
688
			break;
689
		case 14 : 
690
			//AnnotationWriter.UPDATEDELETEADD3
691
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
692
			annotations = testUpdatingDeletingAdding3(); /// OK
693
			break;
694
		case 15 : 
695
			//AnnotationWriter.UPDATEDELETEADD4 -- add 1 annotation over the 2 others
696
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
697
			annotations = testUpdatingDeletingAdding4(); /// OK
698
			break;
699
		case 16 : 
700
			//AnnotationWriter.UPDATEDELETEADD5 -- delete 2 annotations, add 1
701
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
702
			annotations = testUpdatingDeletingAdding5(); /// OK
703
			break;
704
		case 17 : 
705
			//AnnotationWriter.UPDATEDELETEADD6 -- delete 2 annotations, add 2
706
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
707
			annotations = testUpdatingDeletingAdding6(); /// NOT OK
708
			break;
709
		case 18 : 
710
			//AnnotationWriter.ADDDELETE -- delete 1, add 1
711
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
712
			annotations = testAddDeleting2(); /// OK 
713
			break;
714
			//
715
		default: break;
716
		}
717

  
718
		// no token annotations
719
		AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, annotations, new ArrayList<Annotation>(), corpus_start_position, true);
720
		File outfile = new File(xmlFile.getParentFile(), "result-"+xmlFile.getName());
721
		if (annotationInjector.process(outfile)) {
722
			if (!ValidateXml.test(outfile)) {
723
				System.out.println("FAIL");
724
			} else {
725
				System.out.println("SUCCESS ??");
726
			}	
727
		}
728

  
729
	}
730

  
731
	public static void main(String[] args) {
732
		try {
733
		File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j.xml");
734
		File outfile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j-annot.xml");
735

  
736
		ArrayList<Annotation> segmentAnnotations = new ArrayList<Annotation>();
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff