Revision 850

tmp/org.txm.annotation.kr.core/.classpath (revision 850)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="lib" path="libs/hsqldb.jar"/>
11
	<classpathentry kind="lib" path="libs/postgresql-9.4.1207.jre6.jar"/>
12
	<classpathentry kind="lib" path="libs/sqlite-jdbc-3.8.11.2.jar"/>
13
	<classpathentry kind="output" path="bin"/>
14
</classpath>
0 15

  
tmp/org.txm.annotation.kr.core/META-INF/persistence.xml (revision 850)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<persistence version="2.1" xmlns="http://xmlns.jcp.org/xml/ns/persistence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd">	
3
		<persistence-unit name="HSQLKRPERSISTENCE" transaction-type="RESOURCE_LOCAL">
4
		<provider>org.eclipse.persistence.jpa.PersistenceProvider</provider>
5
		
6
		<class>org.txm.annotation.kr.core.Annotation</class>
7
		<class>org.txm.annotation.kr.core.repository.AnnotationType</class>
8
		<class>org.txm.annotation.kr.core.repository.TypedValue</class>
9
<!--	
10
		<class>org.txm.functions.dictionary_jpa.EntryId</class>
11
		<class>org.txm.functions.dictionary_jpa.EntryProperty</class>
12
		<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class>
13
-->
14
		<properties>
15
			<property name="javax.persistence.jdbc.driver" value="org.hsqldb.jdbcDriver"/>
16
			<property name="javax.persistence.jdbc.url" value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0"/>
17
			<property name="javax.persistence.jdbc.user" value="SA"/>
18
			
19
<!-- 			<property name="eclipselink.logging.parameters" value="false"/>
20
 			<property name="eclipselink.logging.level" value="ON" />
21
 			
22
		
23
			<property name="eclipselink.jdbc.read-connections.min" value="1" />
24
			<property name="eclipselink.jdbc.write-connections.min" value="1" />
25
			<property name="eclipselink.jdbc.batch-writing" value="JDBC" />
26
-->
27

  
28
<!-- Logging 
29
			<property name="eclipselink.logging.file" value="output.log"/>
30
			<property name="eclipselink.logging.logger" value="JavaLogger"/>
31
-->
32
		
33
<!-- 		
34
			<property name="eclipselink.logging.level" value="FINE" />
35
			<property name="eclipselink.logging.timestamp" value="false" />
36
			<property name="eclipselink.logging.session" value="false" />
37
			<property name="eclipselink.logging.thread" value="false" /> 
38
-->
39
			
40
			<!-- <property name="eclipselink.ddl-generation" value="drop-and-create-tables"/> -->
41
			<!-- <property name="eclipselink.ddl-generation.output-mode" value="database"/> -->
42
		</properties>
43
 
44
	</persistence-unit>
45
</persistence>
0 46

  
tmp/org.txm.annotation.kr.core/META-INF/MANIFEST.MF (revision 850)
1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.core;bundle-version="0.7.0";visibility:=reexport,
3
 org.txm.searchengine.cqp.core;visibility:=reexport,
4
 org.txm.utils;bundle-version="1.0.0";visibility:=reexport,
5
 javax.persistence;bundle-version="2.1.0";visibility:=reexport,
6
 org.eclipse.persistence.asm;bundle-version="3.3.1";visibility:=reexport,
7
 org.eclipse.persistence.jpa;bundle-version="2.6.0";visibility:=reexport,
8
 org.eclipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport,
9
 org.eclipse.persistence.antlr;bundle-version="3.2.0";visibility:=reexport,
10
 org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport,
11
 org.txm.searchengine.core;bundle-version="1.0.0";visibility:=reexport,
12
 org.eclipse.ui.workbench;visibility:=reexport,
13
 org.txm.annotation.core;visibility:=reexport
14
Export-Package: org.hsqldb,
15
 org.hsqldb.auth,
16
 org.hsqldb.dbinfo,
17
 org.hsqldb.error,
18
 org.hsqldb.index,
19
 org.hsqldb.jdbc,
20
 org.hsqldb.jdbc.pool,
21
 org.hsqldb.lib,
22
 org.hsqldb.lib.java,
23
 org.hsqldb.lib.tar,
24
 org.hsqldb.map,
25
 org.hsqldb.navigator,
26
 org.hsqldb.persist,
27
 org.hsqldb.resources,
28
 org.hsqldb.result,
29
 org.hsqldb.rights,
30
 org.hsqldb.rowio,
31
 org.hsqldb.scriptio,
32
 org.hsqldb.server,
33
 org.hsqldb.types,
34
 org.hsqldb.util,
35
 org.postgresql,
36
 org.postgresql.copy,
37
 org.postgresql.core,
38
 org.postgresql.core.v2,
39
 org.postgresql.core.v3,
40
 org.postgresql.ds,
41
 org.postgresql.ds.common,
42
 org.postgresql.fastpath,
43
 org.postgresql.geometric,
44
 org.postgresql.gss,
45
 org.postgresql.hostchooser,
46
 org.postgresql.jdbc,
47
 org.postgresql.jdbc2,
48
 org.postgresql.jdbc2.optional,
49
 org.postgresql.jdbc3,
50
 org.postgresql.largeobject,
51
 org.postgresql.osgi,
52
 org.postgresql.ssl,
53
 org.postgresql.ssl.jdbc4,
54
 org.postgresql.sspi,
55
 org.postgresql.translation,
56
 org.postgresql.util,
57
 org.postgresql.xa,
58
 org.sqlite,
59
 org.sqlite.core,
60
 org.sqlite.date,
61
 org.sqlite.javax,
62
 org.sqlite.jdbc3,
63
 org.sqlite.jdbc4,
64
 org.sqlite.util,
65
 org.txm.annotation.kr.core,
66
 org.txm.annotation.kr.core.conversion,
67
 org.txm.annotation.kr.core.preferences,
68
 org.txm.annotation.kr.core.repository,
69
 org.txm.annotation.kr.core.storage.temporary,
70
 org.txm.annotation.kr.core.temporary
71
Bundle-ActivationPolicy: lazy
72
Bundle-ClassPath: .,libs/hsqldb.jar,libs/postgresql-9.4.1207.jre6.jar,
73
 libs/sqlite-jdbc-3.8.11.2.jar
74
Bundle-Version: 1.0.0.qualifier
75
Bundle-Name: org.txm.annotation.kr.core
76
Bundle-ManifestVersion: 2
77
Bundle-Activator: org.txm.annotation.kr.core.Activator
78
Bundle-SymbolicName: org.txm.annotation.kr.core;singleton:=true
79
Meta-Persistence: META-INF/persistence.xml
80
Bundle-RequiredExecutionEnvironment: JavaSE-1.7
81

  
0 82

  
tmp/org.txm.annotation.kr.core/.project (revision 850)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.annotation.kr.core</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationInjector.java (revision 850)
1
package org.txm.annotation.kr.core;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.net.MalformedURLException;
6
import java.util.ArrayList;
7
import java.util.Collections;
8
import java.util.Comparator;
9
import java.util.Date;
10
import java.util.HashMap;
11
import java.util.List;
12

  
13
import javax.xml.stream.XMLInputFactory;
14
import javax.xml.stream.XMLStreamException;
15

  
16
import org.txm.importer.StaxIdentityParser;
17
import org.txm.importer.ValidateXml;
18
import org.txm.objects.BaseParameters;
19

  
20
/**
21
 * The Class AnnotationInjection.
22
 *
23
 * @author mdecorde
24
 *
25
 * inject annotation from a stand-off file into a xml-tei-txm file "id"
26
 */
27

  
28
public class AnnotationInjector extends StaxIdentityParser {
29

  
30
	File xmlFile;
31
	HashMap<Integer, List<Annotation>> annotationsToAddByStartPos; // contains
32
																	// annotation
33
																	// to write
34
	HashMap<Integer, List<Annotation>> annotationsToAddByEndPos;
35
	List<Annotation> currentStartAnnotations;
36
	List<Annotation> currentEndAnnotations;
37
	HashMap<String, Annotation> currentTokenAnnotations = new HashMap<String, Annotation>();
38

  
39
	int n = 0;
40
	boolean debug = false;
41

  
42
	String data; // the word id properties to add/replace
43
	String newform = null;
44
	String wordId;
45
	boolean inW = false;
46
	String anaType;
47

  
48
	int position_counter = 0;
49

  
50
	String closeNext = null; // the next close tag to delete
51
	ArrayList<String> openedElements = new ArrayList<String>(); // to ensure to
52
																// delete the
53
																// right element
54

  
55
	private List<Annotation> previousEndAnnotations;
56
	private ArrayList<Annotation> tokenAnnotations;
57

  
58
	String user;
59

  
60
	/**
61
	 * 
62
	 * @param xmlFile
63
	 * @param tokenAnnotations
64
	 * @param annotationsToAddByStartPos
65
	 * @param annotationsToAddByEndPos
66
	 * @param corpus_start_position
67
	 * @param debug
68
	 * @throws XMLStreamException 
69
	 * @throws IOException 
70
	 */
71
	public AnnotationInjector(File xmlFile, List<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, int corpus_start_position, boolean debug) throws IOException, XMLStreamException {
72
		super(xmlFile.toURI().toURL()); // init reader and writer
73

  
74
		this.user = System.getProperty("user.name");
75

  
76
		this.debug = debug;
77
		this.xmlFile = xmlFile;
78
		this.n = 0;
79
		// println ""+records.size()+" lines to process..."
80
		this.position_counter = corpus_start_position;
81
		factory = XMLInputFactory.newInstance();
82

  
83
		// preparing annotations to being written in the right inclusion order
84
		annotationsToAddByStartPos = new HashMap<Integer, List<Annotation>>();
85
		annotationsToAddByEndPos = new HashMap<Integer, List<Annotation>>();
86

  
87
		for (Annotation a : segmentAnnotations) {
88
			if (!annotationsToAddByStartPos.containsKey(a.getStart()))
89
				annotationsToAddByStartPos.put(a.getStart(), new ArrayList<Annotation>());
90
			if (!annotationsToAddByEndPos.containsKey(a.getEnd()))
91
				annotationsToAddByEndPos.put(a.getEnd(), new ArrayList<Annotation>());
92

  
93
			annotationsToAddByStartPos.get(a.getStart()).add(a);
94
			annotationsToAddByEndPos.get(a.getEnd()).add(a);
95
		}
96

  
97
		for (int i : annotationsToAddByStartPos.keySet()) {
98
			List<Annotation> a = annotationsToAddByStartPos.get(i);
99
			Collections.sort(a, new Comparator<Annotation>() { // reverse sort
100
																// annotation ->
101
																// write the
102
																// smaller in
103
																// the bigger
104
						@Override
105
						public int compare(Annotation arg0, Annotation arg1) {
106
							return arg1.getEnd() - arg0.getEnd();
107
						}
108
					});
109
		}
110
		for (int i : annotationsToAddByEndPos.keySet()) {
111
			List<Annotation> a = annotationsToAddByEndPos.get(i);
112
			Collections.sort(a, new Comparator<Annotation>() { // reverse sort
113
																// annotation ->
114
																// write the
115
																// smaller in
116
																// the bigger
117
						@Override
118
						public int compare(Annotation arg0, Annotation arg1) {
119
							return arg1.getStart() - arg0.getStart();
120
						}
121
					});
122
		}
123

  
124
		// sorting token annotations
125
		this.tokenAnnotations = tokenAnnotations;
126
		Collections.sort(tokenAnnotations, new Comparator<Annotation>() {
127
			@Override
128
			public int compare(Annotation arg0, Annotation arg1) {
129
				return arg0.getStart() - arg1.getStart();
130
			}
131
		});
132

  
133
		if (debug) {
134
			System.out.println("annotations for " + xmlFile);
135
			System.out.println(" segment grouped and ordered by start position: " + annotationsToAddByStartPos);
136
			System.out.println(" segment grouped and ordered by end position: " + annotationsToAddByEndPos);
137
			System.out.println(" token ordered by start position: " + tokenAnnotations);
138
		}
139
	}
140

  
141
	boolean mustChangeAnaValue = false;
142
	boolean inAna = false;
143
	private boolean inForm;
144
	private boolean mustChangeWordValue;
145

  
146
	protected void processStartElement() throws XMLStreamException, IOException {
147
		
148
		currentStartAnnotations = annotationsToAddByStartPos.get(position_counter);
149
		currentEndAnnotations = annotationsToAddByEndPos.get(position_counter);
150
		//System.out.println("A Starts: "+currentStartAnnotations+" "+currentEndAnnotations);
151
		// if (debug)
152
		// System.out.println("----- "+localname+"@"+position_counter+" START ANNOT = "+currentStartAnnotations);
153
		if ("teiHeader".equals(localname)) {
154
			super.processStartElement();
155
			goToEnd("teiHeader"); // skip teiHeader // will be closed in
156
									// processEndElement()
157
		} else if ("w".equals(localname)) {
158
			// System.out.println("W START pos="+position_counter+" annots="+currentStartAnnotations);
159
			if (currentStartAnnotations != null) {// there are still annotations
160
													// to write, possible
161
													// several for a position
162
				// if (debug)
163
				// System.out.println("WRITING START at "+position_counter+": "+currentStartAnnotations);
164
				for (Annotation a : currentStartAnnotations) {
165
					if (!"#del".equals(a.getValue())) {
166
						if (debug)
167
							System.out.println(" force write start annotation " + a);
168
						writeStartAnnotation(a);
169
					} else {
170
						if (debug)
171
							System.out.println(" no need to write start annotation " + a);
172
						currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());
173
						// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
174
						// " | currentEndAnnotations : "+currentEndAnnotations);
175
						if (currentEndAnnotations == null) {
176
							System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
177
						} else {
178
							currentEndAnnotations.remove(a);
179
						}
180
					}
181
				}
182
			}
183
			inW = true;
184

  
185
			// get token annotation if any
186
			currentTokenAnnotations.clear(); // current word annotations to write, may be empty
187
			Annotation a = null;
188
			
189
			if (tokenAnnotations.size() > 0) {
190
				a = tokenAnnotations.get(0);
191
				//System.out.println("for p="+position_counter+" next token annot="+a);
192
				
193
				if (a.getStart() <= position_counter && position_counter <= a.getEnd()) {
194
					// write the annotation
195
					currentTokenAnnotations.put(a.getType(), a);
196
					if (a.getEnd() == position_counter) // its the last word that needs to write this annotation
197
						tokenAnnotations.remove(0);
198
				}
199
			}
200

  
201
			super.processStartElement(); // write the tag
202
		
203
		} else if ("form".equals(localname) && inW) {
204
			if (currentTokenAnnotations.containsKey("word")) {
205
				Annotation a = currentTokenAnnotations.get("word");
206
				//System.out.println("Updating token annotation with: " + a);
207

  
208
				String value = a.getValue();
209
				if ("#del".equals(value))
210
					value = "";
211
				if (value == null)
212
					value = "ERROR";
213

  
214
				try {
215
					writer.writeStartElement("form");
216
					writer.writeCharacters(value);
217
					mustChangeWordValue = true;
218
				} catch (XMLStreamException e) {
219
					// TODO Auto-generated catch block
220
					e.printStackTrace();
221
				}
222

  
223
				currentTokenAnnotations.remove("word"); // annotation updated
224
			} else {
225
				super.processStartElement();
226
			}
227
		} else if ("ana".equals(localname) && inW) {
228
			inAna = true;
229
			String type = parser.getAttributeValue(null, "type").substring(1);
230
			if (currentTokenAnnotations.containsKey(type)) {
231
				Annotation a = currentTokenAnnotations.get(type);
232
				//System.out.println("Updating token annotation with: " + a);
233

  
234
				String value = a.getValue();
235
				if ("#del".equals(value))
236
					value = "";
237
				if (value == null)
238
					value = "ERROR";
239

  
240
				try {
241
					writer.writeStartElement("ana");
242
					writer.writeAttribute("type", "#" + type);
243
					writer.writeAttribute("resp", "#" + a.getAnnotator()); // change
244
																			// resp
245
					writer.writeCharacters(value);
246
					mustChangeAnaValue = true;
247
				} catch (XMLStreamException e) {
248
					// TODO Auto-generated catch block
249
					e.printStackTrace();
250
				}
251

  
252
				currentTokenAnnotations.remove(type); // annotation updated
253
			} else {
254
				super.processStartElement();
255
			}
256
		} else if (currentStartAnnotations != null) { // is it an annotations ?
257
			// System.out.println("A START="+localname);
258
			boolean toDelete = false;
259
			for (int i = 0; i < currentStartAnnotations.size(); i++) {
260
				Annotation a = currentStartAnnotations.get(i);
261
				// if (debug)
262
				// System.out.println("=============== Start annot : "+a);
263
				if (a.getType().equals(localname)) { // force delete, will be
264
														// written in the "w"
265
														// case
266
					int start = Integer.parseInt(parser.getAttributeValue(null, "start"));
267
					int end = Integer.parseInt(parser.getAttributeValue(null, "end"));
268
					if (a.getStart() == start && a.getEnd() == end) {
269
						// updated = true;
270
						// if (debug)
271
						// System.out.println(" found existing annotation "+a);
272
						if ("#del".equals(a.getValue())) {
273
							toDelete = true;
274
							// currentEndAnnotations.remove(a); // MAYBE NOT TO
275
							// DO THIS HERE ?
276
							if (debug)
277
								System.out.println(" force delete start annotation " + a);
278
						} else { // update existing annotation, no need to store
279
									// the end of annotation
280
							writeStartAnnotation(a);
281
							toDelete = true;
282
							if (debug)
283
								System.out.println(" update annotation " + a);
284
							currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());// EL
285
																								// NO
286
																								// CAPITO
287
																								// YET
288
																								// !!
289
							// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
290
							// " | currentEndAnnotations : "+currentEndAnnotations);
291
							if (currentEndAnnotations == null) {
292
								System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
293
							} else {
294
								currentEndAnnotations.remove(a);
295
							}
296
						}
297

  
298
						currentStartAnnotations.remove(i);
299
						i--;
300
					}
301

  
302
					break; // no need to continue
303
				} else {
304
					// System.out.println("------- with same start pos");
305
					String s_end = parser.getAttributeValue(null, "end");
306
					String s_start = parser.getAttributeValue(null, "start");
307
					String s_author = parser.getAttributeValue(null, "author");
308
					String s_ref = parser.getAttributeValue(null, "ref");
309
					if (s_start != null && s_end != null && s_author != null && s_ref != null) {
310
						int end = Integer.parseInt(s_end);
311
						if (a.getEnd() >= end) { // must write a
312
							if (!"#del".equals(a.getValue())) {
313
								if (debug)
314
									System.out.println(" writing of start annotation " + a);
315
								writeStartAnnotation(a);
316

  
317
							} else {
318
								toDelete = true;
319
								if (debug)
320
									System.out.println(" no writing of start annotation " + a);
321
								currentEndAnnotations = annotationsToAddByEndPos.get(a.getEnd());
322
								// System.out.println(" shall we need to change annotation attribute : "+a.getValue()+
323
								// " | currentEndAnnotations : "+currentEndAnnotations);
324
								if (currentEndAnnotations == null) {
325
									System.out.println("WARNING ERROR null pointer for end position of annotation " + a);
326
								} else {
327
									currentEndAnnotations.remove(a);
328
								}
329
							}
330
							currentStartAnnotations.remove(i);
331
							i--;
332
							// break;
333
						}
334
					}
335
				}
336
			}
337
			if (!toDelete) { // the structure or annotation was not modified
338
				super.processStartElement();
339
			}
340

  
341
		} else {
342
			// System.out.println("X START="+localname);
343
			super.processStartElement();
344
		}
345
	}
346

  
347
	@Override
348
	public void processCharacters() throws XMLStreamException {
349
		//System.out.println("processCharaters inAna="+inAna+" mustChangeAnaValue="+mustChangeAnaValue);
350
		if (inAna && mustChangeAnaValue) {
351
			// nothing content is already written
352
			//System.out.println("skip ana value because we replace it");
353
		} else if (inW && mustChangeWordValue) {
354
			// nothing content is already written
355
			//System.out.println("skip form value because we replace it");
356
		} else {
357
			super.processCharacters();
358
		}
359
	}
360

  
361
	protected void processEndElement() throws XMLStreamException {
362
		// currentEndAnnotations =
363
		// annotationsToAddByEndPos.get(position_counter); // annotation to end
364
		previousEndAnnotations = annotationsToAddByEndPos.get(position_counter - 1); // existing
365
																						// (or
366
																						// not)
367
																						// annotation
368
																						// that
369
																						// have
370
																						// already
371
																						// been
372
																						// closed
373
		// if (debug)
374
		// System.out.println("----- "+localname+"@"+position_counter+" END ANNOT = "+currentEndAnnotations);
375

  
376
		if ("w".equals(localname)) {
377
			// System.out.println("W END");
378

  
379
			for (String type : currentTokenAnnotations.keySet()) {
380
				try {
381
					writer.writeStartElement("ana");
382
					writer.writeAttribute("type", "#" + type);
383
					writer.writeAttribute("resp", "#" + currentTokenAnnotations.get(type).getAnnotator()); // change
384
																											// resp
385
					writer.writeCharacters(currentTokenAnnotations.get(type).getValue());
386
					writer.writeEndElement();
387
				} catch (XMLStreamException e) {
388
					e.printStackTrace();
389
				}
390
			}
391

  
392
			super.processEndElement(); // write word then close annotations
393
			inW = false;
394
			// force write All known annotation after the word
395
			if (currentEndAnnotations != null) {// there are still annotations
396
												// to write, possible several
397
												// for a position
398
				// if (debug)
399
				// System.out.println("WRITING END at "+position_counter+": "+currentEndAnnotations);
400
				for (Annotation a : currentEndAnnotations) {
401
					// if (debug)
402
					// System.out.println("=============== End annot : "+a);
403
					if (!"#del".equals(a.getValue())) {
404
						if (debug)
405
							System.out.println(" force write end annotation " + a);
406
						writeEndAnnotation(a);
407
					}/*
408
					 * else { if (debug)
409
					 * System.out.println(" no need to write end annotation "
410
					 * +a); }
411
					 */
412

  
413
				}
414
			}
415

  
416
			position_counter++;
417
		} else if ("form".equals(localname) && inW) {
418
			//if (!mustChangeAnaValue)
419
				super.processEndElement();
420
				mustChangeWordValue = false;
421
			inForm = false;
422
		} else if ("ana".equals(localname) && inW) {
423
			//if (!mustChangeAnaValue)
424
				super.processEndElement();
425
			inAna = false;
426
			mustChangeAnaValue = false;
427
		} else if (previousEndAnnotations != null) { // force delete annotations
428
														// previously written in
429
														// the "w" case
430
			// System.out.println("previousEndAnnotations !!!!! "+previousEndAnnotations);
431
			boolean toDelete = false;
432
			for (int i = 0; i < previousEndAnnotations.size(); i++) {
433
				Annotation a = previousEndAnnotations.get(i);
434
				if (a.getType().equals(localname)) { // update the annotation
435
					// if (debug)
436
					// System.out.println(" found existing end annotation "+a);
437

  
438
					if ("#del".equals(a.getValue())) { // if
439
														// (!"#del".equals(a.getValue()))
440
														// {
441
						toDelete = true;
442
						if (debug)
443
							System.out.println(" force delete end annotation " + a);
444
					}
445

  
446
					previousEndAnnotations.remove(i);
447
					i--;
448
					break; // no need to continue
449
				}
450
			}
451
			if (!toDelete) { // the structure or annotation was not modified
452
				super.processEndElement();
453
			} else {
454
				// if (debug)
455
				// System.out.println("DELETING END at "+position_counter+" localname="+localname+": "+previousEndAnnotations);
456
			}
457
		} else {
458
			// System.out.println("X END="+localname);
459
			super.processEndElement();
460
		}
461
	}
462

  
463
	private void writeStartAnnotation(Annotation a) {
464
		try {
465
			writer.writeStartElement("txm:" + a.getType());
466
			writer.writeAttribute("author", "" + a.getAnnotator());
467
			writer.writeAttribute("ref", a.getValue());
468
			writer.writeAttribute("date", BaseParameters.dateformat.format(new Date()));
469
			writer.writeAttribute("start", Integer.toString(a.getStart()));
470
			writer.writeAttribute("end", Integer.toString(a.getEnd()));
471
		} catch (XMLStreamException e) {
472
			// TODO Auto-generated catch block
473
			e.printStackTrace();
474
		}
475
	}
476

  
477
	private void writeEndAnnotation(Annotation a) {
478
		try {
479
			writer.writeEndElement();
480
		} catch (XMLStreamException e) {
481
			// TODO Auto-generated catch block
482
			e.printStackTrace();
483
		}
484
	}
485
	
486
//////////// TESTS ///////////
487
	
488
	public static ArrayList<Annotation>  testAdding(){
489

  
490
		Annotation a1 = new Annotation("Actr", "Actr100", 15, 16);
491
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
492
		annotations.add(a1);
493
		return annotations;
494
	}
495

  
496
	public static ArrayList<Annotation>  testAddingInclusive(){
497

  
498
		Annotation a1 = new Annotation("CoAc", "CoAc2093", 15, 19);
499
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
500
		annotations.add(a1);
501
		return annotations;
502
	}
503

  
504
	public static ArrayList<Annotation>  testAddingInclusive2(){
505

  
506
		Annotation a1 = new Annotation("Actr", "Actr100", 15, 16);
507
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
508
		annotations.add(a1);
509
		return annotations;
510
	}
511

  
512
	public static ArrayList<Annotation>  testUpdating(){
513

  
514
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
515
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
516
		annotations.add(a1);
517
		return annotations;
518
	}
519

  
520
	public static ArrayList<Annotation>  testUpdatingDeleting(){	
521
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
522
		Annotation a2 = new Annotation("CoAc", "#del", 15, 19);		
523
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
524
		annotations.add(a1);
525
		annotations.add(a2);
526
		return annotations;
527
	}
528

  
529
	public static ArrayList<Annotation>  testAddDeleting2(){	
530
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
531
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD
532
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
533
		annotations.add(a1);
534
		annotations.add(a2);
535
		return annotations;
536
	}
537

  
538
	public static ArrayList<Annotation>  testUpdatingDeletingAdding(){	
539
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16); //UPDATE
540
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD	
541
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19);	//DELETE	
542
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
543
		annotations.add(a1);
544
		annotations.add(a2);
545
		annotations.add(a3);
546
		return annotations;
547
	}
548

  
549
	public static ArrayList<Annotation>  testUpdatingDeletingAdding2(){	// NOT WORKING
550
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
551
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 19); //ADD  same place as CoAc
552
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
553
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
554
		annotations.add(a1);
555
		annotations.add(a2);
556
		annotations.add(a3);
557
		return annotations;
558
	}
559

  
560
	public static ArrayList<Annotation>  testUpdatingDeletingAdding3(){	// NOT WORKING
561
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
562
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD same place as Actr		
563
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
564
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
565
		annotations.add(a1);
566
		annotations.add(a2);
567
		annotations.add(a3);
568
		return annotations;
569
	}
570

  
571
	public static ArrayList<Annotation>  testUpdatingDeletingAdding4(){	
572
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
573
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 13, 20); //ADD Outside Actr and CoAc		
574
		Annotation a3 = new Annotation("CoAc", "CoAc2091", 15, 19); //UPDATE		
575
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
576
		annotations.add(a1);
577
		annotations.add(a2);
578
		annotations.add(a3);
579
		return annotations;
580
	}
581

  
582
	public static ArrayList<Annotation>  testUpdatingDeletingAdding5(){	
583
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
584
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD		
585
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE		
586
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
587
		annotations.add(a1);
588
		annotations.add(a2);
589
		annotations.add(a3);
590
		return annotations;
591
	}
592

  
593
	public static ArrayList<Annotation>  testUpdatingDeletingAdding6(){	 
594
		Annotation a1 = new Annotation("Actr", "#del", 15, 16); //DELETE
595
		Annotation a2 = new Annotation("NaPl", "NaPl14554", 15, 16); //ADD		
596
		Annotation a3 = new Annotation("CoAc", "#del", 15, 19); //DELETE		
597
		Annotation a4 = new Annotation("CoAc", "CoAc1", 15, 18); //DELETE
598
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
599
		annotations.add(a1);
600
		annotations.add(a2);
601
		annotations.add(a3);
602
		annotations.add(a4);
603
		return annotations;
604
	}
605

  
606
	public static ArrayList<Annotation>  testUpdatingInclusive(){	
607
		Annotation a1 = new Annotation("Actr", "Actr200", 15, 16);
608
		Annotation a2 = new Annotation("CoAc", "CoAc321", 15, 19);		
609
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
610
		annotations.add(a1);
611
		annotations.add(a2);
612
		return annotations;
613
	}
614

  
615
	public static ArrayList<Annotation>  testDeleting(){
616

  
617
		Annotation a1 = new Annotation("Actr", "#del", 15, 16);
618
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
619
		annotations.add(a1);
620
		return annotations;
621
	}
622

  
623
	public static ArrayList<Annotation>  testDeletingInclusive(){
624

  
625
		Annotation a1 = new Annotation("Actr", "#del", 15, 16);
626
		Annotation a2 = new Annotation("CoAc", "#del", 15, 16);		
627
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
628
		annotations.add(a1);
629
		annotations.add(a2);
630
		return annotations;
631
	}
632

  
633
	static int ADD = 1;
634
	static int UPDATE = 2;
635
	static int DELETE = 3;
636
	static int ADD2 = 4;
637
	static int UPDATE2 = 5;
638
	static int DELETE2 = 6;
639
	static int DELETE3 = 7;
640
	static int DELETE4 = 8;
641
	static int ADD3 = 9;
642
	static int UPDATE3 = 10;
643
	static int UPDATEDELETE = 11;
644
	static int UPDATEDELETEADD = 12;
645
	static int UPDATEDELETEADD2 = 13;
646
	static int UPDATEDELETEADD3 = 14;
647
	static int UPDATEDELETEADD4 = 15;
648
	static int UPDATEDELETEADD5 = 16;
649
	static int UPDATEDELETEADD6 = 17;
650
	static int ADDDELETE2 = 18;
651

  
652
	public static void main2(String args[]) throws IOException, XMLStreamException {
653
		File xmlFile = null;
654
		int corpus_start_position = 10;
655

  
656
		ArrayList<Annotation> annotations = null;
657
		int test = UPDATEDELETEADD3;//AnnotationWriter.UPDATE;// AnnotationWriter.ADD;
658
		switch (test) {
659
		case 1 : 
660
			//AnnotationWriter.ADD
661
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST.xml");
662
			annotations = testAdding(); /// OK
663
			break;
664
		case 2 : 
665
			//AnnotationWriter.UPDATE
666
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST2.xml");
667
			annotations = testUpdating(); /// OK
668
			break;
669
		case 3 : 
670
			//AnnotationWriter.DELETE
671
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST4.xml");
672
			annotations = testDeleting(); /// OK
673
			break;
674
		case 4 : 
675
			//AnnotationWriter.ADD2
676
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3.xml");
677
			annotations = testAddingInclusive(); /// OK
678
			break;
679
		case 5 : 
680
			//AnnotationWriter.UPDATE2
681
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
682
			annotations = testUpdating(); /// OK
683
			break;
684
		case 6 : 
685
			//AnnotationWriter.DELETE2
686
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml");
687
			annotations = testDeleting();
688
			break;
689
		case 7 : 
690
			//AnnotationWriter.DELETE3
691
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST6.xml");
692
			annotations = testDeletingInclusive();
693
			break;
694
		case 8 : 
695
			//AnnotationWriter.DELETE4
696
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST7.xml");
697
			annotations = testDeletingInclusive();
698
			break;
699
		case 9 : 
700
			//AnnotationWriter.ADD3
701
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST3bis.xml");
702
			annotations = testAddingInclusive2(); /// OK
703
			break;
704
		case 10 : 
705
			//AnnotationWriter.UPDATE3
706
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
707
			annotations = testUpdatingInclusive(); /// OK
708
			break;
709
		case 11 : 
710
			//AnnotationWriter.UPDATEDELETE
711
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
712
			annotations = testUpdatingDeleting(); /// OK
713
			break;
714
		case 12 : 
715
			//AnnotationWriter.UPDATEDELETEADD
716
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
717
			annotations = testUpdatingDeletingAdding(); /// NOT OK
718
			break;
719
		case 13 : 
720
			//AnnotationWriter.UPDATEDELETEADD2
721
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
722
			annotations = testUpdatingDeletingAdding2(); /// OK
723
			break;
724
		case 14 : 
725
			//AnnotationWriter.UPDATEDELETEADD3
726
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
727
			annotations = testUpdatingDeletingAdding3(); /// OK
728
			break;
729
		case 15 : 
730
			//AnnotationWriter.UPDATEDELETEADD4 -- add 1 annotation over the 2 others
731
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
732
			annotations = testUpdatingDeletingAdding4(); /// OK
733
			break;
734
		case 16 : 
735
			//AnnotationWriter.UPDATEDELETEADD5 -- delete 2 annotations, add 1
736
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
737
			annotations = testUpdatingDeletingAdding5(); /// OK
738
			break;
739
		case 17 : 
740
			//AnnotationWriter.UPDATEDELETEADD6 -- delete 2 annotations, add 2
741
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
742
			annotations = testUpdatingDeletingAdding6(); /// NOT OK
743
			break;
744
		case 18 : 
745
			//AnnotationWriter.ADDDELETE -- delete 1, add 1
746
			xmlFile = new File(System.getProperty("user.home"), "TXM/corpora/TEST/TEST5.xml");
747
			annotations = testAddDeleting2(); /// OK 
748
			break;
749
			//
750
		default: break;
751
		}
752

  
753
		// no token annotations
754
		AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, annotations, new ArrayList<Annotation>(), corpus_start_position, true);
755
		File outfile = new File(xmlFile.getParentFile(), "result-"+xmlFile.getName());
756
		if (annotationInjector.process(outfile)) {
757
			if (!ValidateXml.test(outfile)) {
758
				System.out.println("FAIL");
759
			} else {
760
				System.out.println("SUCCESS ??");
761
			}	
762
		}
763

  
764
	}
765

  
766
	public static void main(String[] args) {
767
		try {
768
		File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j.xml");
769
		File outfile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J/tdm80j-annot.xml");
770

  
771
		ArrayList<Annotation> segmentAnnotations = new ArrayList<Annotation>();
772
		int starts[] = { 5, 50, 104, 185, 235, 434, 517, 784, 949 };
773
		int ends[] = { 6, 51, 105, 186, 236, 435, 518, 785, 950 };
774
		for (int i = 0; i < starts.length; i++) {
775
			segmentAnnotations.add(new Annotation("Actr", "Philou", starts[i], ends[i]));
776
		}
777

  
778
		ArrayList<Annotation> tokenAnnotations = new ArrayList<Annotation>();
779
		int positions[] = { 1, 32, 79, 224, 358, 428, 889, 1247, 1253, 1346 };
780
		for (int p : positions) tokenAnnotations.add(new Annotation("frlemma", "de", p, p+5));
781
		
782
		AnnotationInjector ai;
783
		
784
			ai = new AnnotationInjector(xmlFile, segmentAnnotations, tokenAnnotations, 0, true);
785
			System.out.println("start processing...");
786
			ai.process(outfile);
787
			System.out.println("Done.");
788
		} catch (Exception e) {
789
			e.printStackTrace();
790
		}
791
	}
792
}
0 793

  
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/CQPAnnotationManager.java (revision 850)
1
package org.txm.annotation.kr.core;
2

  
3
import java.io.IOException;
4
import java.util.ArrayList;
5
import java.util.HashMap;
6
import java.util.List;
7

  
8
import org.txm.Toolbox;
9
import org.txm.annotation.kr.core.repository.AnnotationEffect;
10
import org.txm.annotation.kr.core.repository.AnnotationType;
11
import org.txm.annotation.kr.core.repository.KnowledgeRepository;
12
import org.txm.searchengine.cqp.AbstractCqiClient;
13
import org.txm.searchengine.cqp.CQPSearchEngine;
14
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
15
import org.txm.searchengine.cqp.corpus.MainCorpus;
16
import org.txm.searchengine.cqp.corpus.Property;
17
import org.txm.searchengine.cqp.corpus.StructuralUnit;
18
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty;
19
import org.txm.searchengine.cqp.corpus.query.Match;
20
import org.txm.searchengine.cqp.serverException.CqiServerError;
21
import org.txm.utils.logger.Log;
22

  
23
public class CQPAnnotationManager {
24
	MainCorpus corpus;
25
	
26
	public static final String REF= "ref";
27

  
28
	public CQPAnnotationManager(MainCorpus corpus) {
29
		this.corpus = corpus;
30
	}
31

  
32
	/**
33
	 * returns all annotations for a given type
34
	 * 
35
	 * @return
36
	 * @throws Exception
37
	 */
38
	public List<Annotation> getAnnotations(AnnotationType type) throws Exception {
39
		return getAnnotations(type, false);
40
	}
41

  
42
	/**
43
	 * returns all annotations for a given type and  overlap option
44
	 * 
45
	 * @return
46
	 * @throws Exception
47
	 */
48
	public List<Annotation> getAnnotations(AnnotationType type, boolean overlap) throws Exception {
49
		return getAnnotations(type, null, null, overlap);
50
	}
51

  
52
	public List<Annotation> getAnnotations(AnnotationType type, int start, int end) throws CqiClientException, IOException, CqiServerError {
53
		return getAnnotations(type, new Match(start, end), null, false);
54
	}
55

  
56
	public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) throws CqiClientException, IOException, CqiServerError {
57
		return getAnnotations(type, new Match(start, end), null, overlap);
58
	}
59

  
60
	/**
61
	 * Return all annotations for ONE match
62
	 * can be use to find out overlap
63
	 * 
64
	 * @param refType can be null
65
	 * @param match can be null
66
	 * @param value can be null
67
	 * @param overlap
68
	 * @return
69
	 * @throws CqiClientException 
70
	 * @throws CqiServerError 
71
	 * @throws IOException 
72
	 */
73
	public List<Annotation> getAnnotations(AnnotationType type, Match match, String value, boolean overlap) throws CqiClientException, IOException, CqiServerError {
74
		HashMap<StructuralUnitProperty, AnnotationType> supList = new HashMap<StructuralUnitProperty, AnnotationType>();
75

  
76
		//TODO add a corpus parameters for structures that code annotations
77
		//TODO manage word properties annotations
78
		if (type == null) {
79
			
80
			// first get know annotations from corpus's KR
81
			for (String kr_name : KRAnnotationEngine.getKnowledgeRepositoryNames(corpus)) {
82
				KnowledgeRepository kr = KRAnnotationEngine.getKnowledgeRepository(corpus, kr_name);
83
				if (kr == null) continue;
84
				List<AnnotationType> types = kr.getAllAnnotationTypes();
85
				for (AnnotationType t : types) {
86
					StructuralUnit su = corpus.getStructuralUnit(t.getId().toLowerCase());
87
					if (su != null) {
88
						StructuralUnitProperty sup = su.getProperty(REF);
89
						if (sup != null) {
90
							supList.put(sup, t);
91
						}
92
					}
93
				}
94
			}
95
			
96

  
97
			for (StructuralUnit su : corpus.getStructuralUnits()) {
98
				if (su.getName().equals("text") || su.getName().equals("txmcorpus") || 
99
						su.getName().equals("lb") || su.getName().equals("pb")) continue; // that's no annotation for sure ;)
100

  
101
				StructuralUnitProperty sup = su.getProperty(REF);
102
				if (sup == null) sup = su.getProperty("n");
103
				if (sup != null) {
104
					if (!supList.containsKey(sup)) // don't override a know annotation
105
						supList.put(sup, null);
106
				}
107
			}
108
			
109
		} else {
110
			StructuralUnit su = corpus.getStructuralUnit(type.getId().toLowerCase());
111
			if (su == null) return new ArrayList<Annotation>(); // no property, no annotation :)
112

  
113
			StructuralUnitProperty sup = su.getProperty(REF);
114
			if (sup == null) return new ArrayList<Annotation>(); // no attribute REF, no annotation :)
115

  
116
			supList.put(sup, type);
117
		}
118

  
119
		ArrayList<Annotation> result = new ArrayList<Annotation>();
120
		for (StructuralUnitProperty sup : supList.keySet()) {
121
			String sup_name = sup.getStructuralUnit().getName();
122
			AnnotationType sup_type = supList.get(sup);
123
			if (sup_type != null) sup_name = sup_type.getId();
124
			
125
			int Nstruct = CQPSearchEngine.getCqiClient().attributeSize(sup.getQualifiedName());
126

  
127
			//TODO: can optimize this, to fetch only structure that matches
128
			int[] iStructs = new int[Nstruct];
129
			for (int iStruct = 0; iStruct < Nstruct; iStruct++) iStructs[iStruct] = iStruct;
130
			String svalues[] = CQPSearchEngine.getCqiClient().struc2Str(sup.getQualifiedName(), iStructs); // one position
131

  
132
			for (int iStruct = 0; iStruct < Nstruct; iStruct++) {
133
				int smatch[] = CQPSearchEngine.getCqiClient().struc2Cpos(sup.getQualifiedName(), iStruct);
134

  
135
				if (value != null) { // supplementary test if value is specified
136
					if (!value.equals(svalues[iStruct])) {
137
						continue; // next struct
138
					}
139
				}
140
				if (match != null) {
141
					if (overlap) {
142
						if (match.getEnd() <  smatch[0]) {
143
							break; // no need to continue, structures are ordered
144
						} else if (smatch[1] < match.getStart()) {
145
							continue; // next structure
146
						} else {
147
							result.add(new Annotation(sup_name, svalues[iStruct], smatch[0], smatch[1]));
148
						}
149
					} else {
150
						if (match.getStart() == smatch[0] && match.getEnd() == smatch[1]) {
151
							result.add(new Annotation(sup_name, svalues[iStruct], smatch[0], smatch[1]));
152
						}
153
					}
154
				} else { // get all annotations
155
					result.add(new Annotation(sup_name, svalues[iStruct], smatch[0], smatch[1]));
156
				}
157
			}
158
		}
159
		return result;
160
	}
161

  
162
	/**
163
	 * 
164
	 * @param matches Ordered matches, not null
165
	 * @param type not null
166
	 * @param overlap use true to detect overlapping annotations
167
	 * @return ONE Annotation per Match for a given type
168
	 * 
169
	 * @throws CqiServerError 
170
	 * @throws IOException 
171
	 * @throws CqiClientException 
172
	 */
173
	public List<Annotation> getAnnotationsForMatches(List<Match> matches, AnnotationType type, boolean overlap) throws IOException, CqiServerError, CqiClientException {
174
		if (type.getEffect().equals(AnnotationEffect.SEGMENT)) {
175
			return getSegmentAnnotationsForMatches(matches, type, overlap);
176
		} else {
177
			return getTokenAnnotationsForMatches(matches, type, overlap);
178
		}
179
	}
180
	
181
	/**
182
	 * 
183
	 * @param matches Ordered matches, not null
184
	 * @param type not null
185
	 * @param overlap use true to detect overlapping annotations
186
	 * @return ONE Annotation per Match for a given type
187
	 * 
188
	 * @throws CqiServerError 
189
	 * @throws IOException 
190
	 * @throws CqiClientException 
191
	 */
192
	public List<Annotation> getTokenAnnotationsForMatches(List<Match> matches, AnnotationType type, boolean overlap) throws IOException, CqiServerError, CqiClientException {
193

  
194
		Property prop = corpus.getProperty(type.getId().toLowerCase());
195
		if (prop == null) return nullAnnotationList(matches.size()); // no property, no annotation :)
196

  
197
		AbstractCqiClient cqi = CQPSearchEngine.getCqiClient();
198
		
199
		int positions[] = new int[matches.size()];
200
		int i = 0;
201
		for (Match m : matches) positions[i++] = m.getStart();
202
		
203
		String[] strs = cqi.cpos2Str(prop.getQualifiedName(), positions);
204
		
205
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
206
		i = 0;
207
		for (String str : strs) {
208
			int p = positions[i++];
209
			annotations.add(new Annotation(type.getId(), str, p, p));
210
		}
211
		Log.info("TOKEN ANNOTATION CQP VALUES: "+annotations);
212
		return annotations;
213
	}
214
	
215
	/**
216
	 * 
217
	 * @param matches Ordered matches, not null
218
	 * @param type not null
219
	 * @param overlap use true to detect overlapping annotations
220
	 * @return ONE Annotation per Match for a given type
221
	 * 
222
	 * @throws CqiServerError 
223
	 * @throws IOException 
224
	 * @throws CqiClientException 
225
	 */
226
	public List<Annotation> getSegmentAnnotationsForMatches(List<Match> matches, AnnotationType type, boolean overlap) throws IOException, CqiServerError, CqiClientException {
227

  
228
		StructuralUnit su = corpus.getStructuralUnit(type.getId().toLowerCase());
229
		if (su == null) return nullAnnotationList(matches.size()); // no property, no annotation :)
230

  
231
		StructuralUnitProperty sup = su.getProperty(REF);
232
		if (sup == null) return nullAnnotationList(matches.size()); // no attribute REF, no annotation :)
233

  
234
		int Nstruct = CQPSearchEngine.getCqiClient().attributeSize(sup.getQualifiedName());
235
		int iMatch = 0;
236
		int iStruct = 0;
237
		int Nmatches = matches.size();
238
		ArrayList<Integer> values = new ArrayList<Integer>(); // contains structure numbers
239
		ArrayList<int[]> annotationsStartEnd = new ArrayList<int[]>(); // contains structure numbers
240

  
241
		if (Nstruct == 0) return new ArrayList<Annotation>(matches.size()); // no annotation
242
		//System.out.println("NUMBER OF STRUCT "+sup+" "+Nstruct);
243
		//System.out.println("MATCHES "+matches);
244
		int smatch[] = CQPSearchEngine.getCqiClient().struc2Cpos(sup.getQualifiedName(), iStruct);
245
		//System.out.println(" TEST WITH "+smatch[0]+"-"+smatch[1]);
246
		if (overlap) {
247
			for (iMatch = 0 ; iStruct < Nstruct && iMatch < Nmatches; ) {
248
				Match m = matches.get(iMatch);
249

  
250
				if(smatch[1] < m.getStart()) { // next struct
251
					iStruct++;
252
					if (iStruct < Nstruct)
253
						smatch = CQPSearchEngine.getCqiClient().struc2Cpos(sup.getQualifiedName(), iStruct);
254
					//System.out.println(" NEXT STRUCT TEST WITH "+smatch[0]+"-"+smatch[1]);
255
				} else if (m.getEnd() < smatch[0]) { // next match
256
					values.add(null);
257
					annotationsStartEnd.add(null);
258
					iMatch++;
259
					//System.out.println(" NEXT MATCH of ["+m.getStart()+"-"+m.getEnd()+"] match TEST WITH "+smatch[0]+"-"+smatch[1]);
260
				} else {
261
					//					((m.getStart() <= smatch[0] && smatch[0] <= m.getEnd()) ||
262
					//								(m.getStart() <= smatch[1] && smatch[1] <= m.getEnd()) ||
263
					//						(smatch[0] <= m.getStart() && m.getEnd() <= smatch[1])) {
264
					iMatch++;
265
					values.add(iStruct);
266
					annotationsStartEnd.add(smatch);
267
					//System.out.println(" FOUND annotation "+smatch[0]+"-"+smatch[1]);
268
				}
269
			}
270
		} else { // strict matches
271
			for (iMatch = 0 ; iStruct < Nstruct && iMatch < Nmatches; ) {
272
				Match m = matches.get(iMatch);
273
				if (smatch[0] == m.getStart() && smatch[1] == m.getEnd()) {
274
					values.add(iStruct);
275
					annotationsStartEnd.add(smatch);
276
					iMatch++;
277
					iStruct++;
278
					if (iStruct < Nstruct)
279
						smatch = CQPSearchEngine.getCqiClient().struc2Cpos(sup.getQualifiedName(), iStruct);
280
				} else if(smatch[1] < m.getStart()) { // next struct
281
					iStruct++;
282
					if (iStruct < Nstruct)
283
						smatch = CQPSearchEngine.getCqiClient().struc2Cpos(sup.getQualifiedName(), iStruct);
284
				} else if (m.getEnd() < smatch[0]) { // next match
285
					values.add(null);
286
					annotationsStartEnd.add(null);
287
					iMatch++;
288
				} 
289
			}
290
		}
291

  
292
		while (values.size() < matches.size()) {
293
			values.add(null); // if matches have not been processed due to no more struct available
294
			annotationsStartEnd.add(null);
295
		}
296

  
297
		int tmp_strucs[] = new int[values.size()]; // create the structure numbers array that CQP.struc2Str needs
298
		for (int i = 0 ; i < values.size() ; i++) {
299
			if (values.get(i) == null ) {
300
				tmp_strucs[i] = 0; // add a fake value, remove after CQP call
301
			} else {
302
				tmp_strucs[i] = values.get(i);
303
			}
304
		}
305

  
306
		String[] svalues = CQPSearchEngine.getCqiClient().struc2Str(sup.getQualifiedName(), tmp_strucs);
307
		ArrayList<Annotation> annotations = new ArrayList<Annotation>();
308
		for (int i = 0 ; i < values.size() ; i++) { // remove the fake values
309
			if (values.get(i) == null) {
310
				annotations.add(null);
311
			} else {
312
				annotations.add(new Annotation(type.getId(), svalues[i], annotationsStartEnd.get(i)[0], annotationsStartEnd.get(i)[1]));
313
			}
314
		}
315

  
316
		Log.info("SEGMENT ANNOTATION CQP VALUES: "+annotations);
317
		return annotations;
318
	}
319

  
320
	private ArrayList<Annotation> nullAnnotationList(int size) {
321
		ArrayList<Annotation> a = new ArrayList<Annotation>(size);
322
		for (int i = 0 ; i < size ; i++) {
323
			a.add(null);
324
		}
325
		return a;
326
	}
327

  
328
	/**
329
	 * @param start
330
	 * @param end
331
	 * @param type
332
	 * @return value for a match and a StructuralUnit
333
	 * @throws IOException
334
	 * @throws CqiServerError
335
	 * @throws CqiClientException 
336
	 */
337
	public String getCQPAnnotationValue(int start, int end, AnnotationType type) throws IOException, CqiServerError, CqiClientException {
338

  
339
		StructuralUnit su = corpus.getStructuralUnit(type.getId().toLowerCase());
340
		if (su == null) return null;
341

  
342
		StructuralUnitProperty sup = su.getProperty(REF);
343
		if (sup == null) return null;
344
		int[] cpos = {start, end};
345
		int[] struc = CQPSearchEngine.getCqiClient().cpos2Struc(sup.getQualifiedName(), cpos);
346

  
347
		if (struc[0] >= 0) {
348
			System.out.println("struct for positions "+start+"-"+end+" -> "+struc[0]+"-"+struc[1]);
349
			if (struc[0] == struc[1]) { // même structure
350
				String[] struc_str = CQPSearchEngine.getCqiClient().struc2Str(sup.getQualifiedName(), struc);
351
				return struc_str[0];
352
			} else { // should not happens
353
				System.out.println("WARNING: FOUND EXISTING ANNOTATION FOR TYPE="+type+" START="+start+" END="+end);
354
				return null;
355
			}
356
		} else {
357
			return null;
358
		}
359
	}
360
	
361
	public void close() {
362
		
363
	}
364
}
0 365

  
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/Annotation.java (revision 850)
1
package org.txm.annotation.kr.core;
2

  
3
import java.io.Serializable;
4
import java.text.DateFormat;
5
import java.text.SimpleDateFormat;
6
import java.util.Date;
7

  
8
import javax.persistence.*;
9

  
10
import org.txm.annotation.kr.core.repository.KnowledgeRepository;
11
import org.txm.objects.BaseParameters;
12
 
13
@Entity
14
public class Annotation implements Serializable {
15

  
16
	private static final long serialVersionUID = -1007684142118207359L;
17

  
18
	//primaryKey corresponds to the start and end positions (in the corpus) and the refType
19
	@EmbeddedId
20
	private AnnotationPK PK;	
21
	
22
	//corresponding to the type and value of the KnowledgeRepository, used to annotate
23
	private String refVal = "";
24
	//user in the project, responsible for the annotation, maybe should be an integer
25
	private String annotatorId = "";
26

  
27
	private String date ;
28
	
29
	public Annotation() {
30
	}
31

  
32
	
33
	public Annotation(String refType, String refVal, int CQPstartpos, int CQPendpos) 	{
34
		this.PK = new AnnotationPK(CQPstartpos, CQPendpos, refType);
35
		this.refVal = refVal;
36
		this.date = BaseParameters.dateformat.format(new Date());
37
		
38
		String s = System.getProperty(KnowledgeRepository.LOGIN_KEY);
39
		if (s != null && s.length() > 0) {
40
			this.annotatorId = s;
41
		} else {
42
			this.annotatorId = System.getProperty("user.name");
43
		}
44
	}
45
	
46
	
47
	public AnnotationPK getPK() {
48
		return this.PK;
49
	}
50
	
51
	public int getStart() {
52
		return this.PK.getStartPosition();
53
	}
54
	
55
	public int getEnd() {
56
		return this.PK.getEndPosition();
57
	}
58
	
59
	public String getType() {
60
		return this.PK.getRefType();
61
	}
62
	
63
	public String getValue() {
64
		return this.refVal;
65
	}
66
	
67
	public String getDate(){
68
		return this.date;
69
	}
70
	
71
	//Maybe will be an integer ?
72
	public String getAnnotator() {
73
		return annotatorId;
74
	}
75
 
76
	
77
	public void setReferentielVal(String newRefVal) {
78
		this.refVal = newRefVal;
79
	}
80
	
81
	
82
	public String toString() {
83
		return getPK().toString() + "=" + getValue();
84
	}
85

  
86
}
0 87

  
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/preferences/AnnotationPreferences.java (revision 850)
1
package org.txm.annotation.kr.core.preferences;
2

  
3

  
4
import org.eclipse.core.runtime.preferences.DefaultScope;
5
import org.osgi.framework.FrameworkUtil;
6
import org.osgi.service.prefs.Preferences;
7
import org.txm.core.preferences.TXMPreferences;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff