Revision 853

tmp/org.txm.annotation.core/src/org/txm/annotation/core/KRAnnotationEngine.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.util.ArrayList;
4
import java.util.HashMap;
5
import java.util.List;
6

  
7
import org.eclipse.core.runtime.IProgressMonitor;
8
import org.txm.annotation.core.repository.KnowledgeRepository;
9
import org.txm.annotation.core.repository.KnowledgeRepositoryManager;
10
import org.txm.annotation.core.repository.SQLKnowledgeRepository;
11
import org.txm.objects.Base;
12
import org.txm.objects.BaseParameters;
13
import org.txm.searchengine.cqp.corpus.Corpus;
14
import org.txm.searchengine.cqp.corpus.MainCorpus;
15
import org.txm.sql.SQLConnection;
16
import org.txm.utils.logger.Log;
17
import org.w3c.dom.Element;
18
import org.w3c.dom.NodeList;
19

  
20
public class KRAnnotationEngine extends AnnotationEngine {
21

  
22
	static HashMap<MainCorpus, AnnotationManager> ams = new HashMap<MainCorpus, AnnotationManager>();
23
	public static String KNOWLEDGE_ACCESS = "access";
24
	public static String KNOWLEDGE_STRINGS = "strings";
25
	public static String KNOWLEDGE_TYPEQUERIES = "queries";
26
	public static String KNOWLEDGE_TYPES = "fields";
27
	
28
	public static String KNOWLEDGE_TYPEWEBACCESS = "typeaccess";
29

  
30
	public static boolean canBeAnnotated(MainCorpus corpus) {
31
		return getKnowledgeRepositoryNames(corpus).size() > 0;
32
	}
33

  
34
	public static AnnotationManager getAnnotationManager(Corpus corpus) throws Exception{
35
		return getAnnotationManager(corpus.getMainCorpus());
36
	}
37

  
38
	/**
39
	 * get the corpus Annotation Manager and Lazy load if created
40
	 * 
41
	 * @param corpus
42
	 * @return
43
	 * @throws Exception
44
	 */
45
	public static AnnotationManager getAnnotationManager(MainCorpus corpus) throws Exception {
46
		AnnotationManager am = ams.get(corpus);
47
		if (am == null) {
48
			if (getKnowledgeRepositoryNames(corpus).size() > 0) {
49
				am = new AnnotationManager(corpus);
50
				am.initialize();
51
				ams.put(corpus, am);
52
			}
53
				
54
		}
55
		return am;
56
	}
57
	
58
	protected static Element getKnowledgeRepositoriesElement(Element corpusElement) {
59
		NodeList textsList = corpusElement.getElementsByTagName("knowledgeRepositories");
60
		if (textsList.getLength() == 0) {
61
			Element knowledgeRepositories = corpusElement.getOwnerDocument().createElement("knowledgeRepositories");
62
			corpusElement.appendChild(knowledgeRepositories);
63
			return knowledgeRepositories;
64
		} else {
65
			return (Element) textsList.item(0);
66
		}
67
	}
68

  
69
	/**
70
	 * 
71
	 * Lazy load KR
72
	 * 
73
	 * @param name KnowledgeRepository's name
74
	 * @return may return null if the KnowledgeRepository does not exist
75
	 */
76
	public static KnowledgeRepository getKnowledgeRepository(Corpus corpus, String name) {
77
		return KnowledgeRepositoryManager.getKnowledgeRepository(name, corpus.getMainCorpus());
78
	}
79

  
80
	/**
81
	 * Utility method to get a knowledge repository configuration
82
	 * 
83
	 * @param name the repository name
84
	 * @return the KR configuration map
85
	 */
86
	public static HashMap<String, HashMap<String, ?>> getKnowledgeRepositoryConfiguration(String name, Element e) {
87
		HashMap<String, HashMap<String, ?>> repConfiguration = new HashMap<String, HashMap<String, ?>>();
88

  
89
		HashMap<String, String> access = new HashMap<String, String>();
90
		HashMap<String, HashMap<String, String>> strings = new HashMap<String, HashMap<String, String>>();
91
		HashMap<String, HashMap<String, String>> fields = new HashMap<String, HashMap<String, String>>();
92

  
93
		if (e == null) return null;
94

  
95
		access.put(SQLKnowledgeRepository.NAME, e.getAttribute(SQLKnowledgeRepository.NAME));
96
		access.put("version", e.getAttribute("version"));
97
		access.put("mode", e.getAttribute("mode"));
98
		access.put(SQLKnowledgeRepository.TYPE_URL, e.getAttribute(SQLKnowledgeRepository.TYPE_URL));
99
		access.put(SQLKnowledgeRepository.TYPE_RESURL, e.getAttribute(SQLKnowledgeRepository.TYPE_RESURL));
100
		access.put(SQLKnowledgeRepository.TYPE_TYPEURL, e.getAttribute(SQLKnowledgeRepository.TYPE_TYPEURL));
101
		access.put(SQLConnection.SQL_ADDRESS, e.getAttribute(SQLConnection.SQL_ADDRESS));
102
		access.put(SQLConnection.SQL_DRIVER, e.getAttribute(SQLConnection.SQL_DRIVER));
103
		access.put(SQLConnection.SQL_PORT, e.getAttribute(SQLConnection.SQL_PORT));
104
		access.put(SQLConnection.SQL_USER, e.getAttribute(SQLConnection.SQL_USER));
105
		access.put(SQLConnection.SQL_PASSWORD, e.getAttribute(SQLConnection.SQL_PASSWORD));
106

  
107
		//<type id="" name> ... </type>
108
		NodeList typesList = e.getElementsByTagName("type");
109
		for (int i = 0 ; i < typesList.getLength() ; i++) {
110
			//<field type="xxx">yyy</field>
111
			Element typeElement = (Element)typesList.item(i);
112
			String type_id = ""+typeElement.getAttribute(SQLKnowledgeRepository.TYPE_ID);
113
			
114
			String type_name = typeElement.getAttribute(SQLKnowledgeRepository.NAME);
115
			if (type_name == null || type_name.length() == 0) type_name = type_id;
116
			
117
			String type_url = ""+typeElement.getAttribute(SQLKnowledgeRepository.TYPE_URL);
118
			
119
			String type_size = typeElement.getAttribute(SQLKnowledgeRepository.TYPE_SIZE);
120
			if (type_size == null || type_size.length() == 0) type_size = "SMALL"; // show all by default 
121
			type_size = type_size.toUpperCase();
122
			
123
			String type_effect = typeElement.getAttribute(SQLKnowledgeRepository.TYPE_EFFECT);
124
			if (type_effect == null || type_effect.length() == 0) type_effect = "SEGMENT"; // segment annotation by default 
125
			type_effect = type_effect.toUpperCase();
126

  
127
			HashMap<String, String> hashFields = new HashMap<String, String>();
128
			NodeList fieldsList = typeElement.getElementsByTagName("field"); // contains KR type specific properties
129
			for (int j = 0 ; j < fieldsList.getLength() ; j++) {
130
				Element fieldElement = (Element)fieldsList.item(j);
131
				hashFields.put(fieldElement.getAttribute("type"), fieldElement.getTextContent());
132
			}
133

  
134
			hashFields.put(SQLKnowledgeRepository.TYPE_URL, type_url);
135
			hashFields.put(SQLKnowledgeRepository.NAME, type_name);
136
			hashFields.put(SQLKnowledgeRepository.TYPE_SIZE, type_size);
137
			hashFields.put(SQLKnowledgeRepository.TYPE_EFFECT, type_effect);
138
			fields.put(type_id, hashFields);
139
		}
140
		
141
		NodeList stringsList = e.getElementsByTagName("strings");
142
		for (int i = 0 ; i < stringsList.getLength() ; i++) {
143
			Element stringsElement = (Element)stringsList.item(i);
144
			String lang = stringsElement.getAttribute("lang");
145
			if (lang == null) lang = "en"; // default lang is "en"
146
			
147
			HashMap<String, String> values = new HashMap<String, String>();
148
			strings.put(lang, values);
149
			
150
			NodeList stringList = stringsElement.getElementsByTagName("string");
151
			for (int j = 0 ; j < stringList.getLength() ; j++) {
152
				Element stringElement = (Element)stringList.item(j);
153
				String key = stringElement.getAttribute("key");
154
				if (key != null) {
155
					values.put(key, stringElement.getTextContent());
156
				}
157
			}
158
		}
159

  
160
		repConfiguration.put(KNOWLEDGE_ACCESS, access);
161
		repConfiguration.put(KNOWLEDGE_TYPES, fields);
162
		repConfiguration.put(KNOWLEDGE_STRINGS, strings);
163
		
164
		return repConfiguration;
165
	}
166

  
167
	public static Element getKnowledgeRepositoryElement(BaseParameters params, String name) {
168
		Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement());
169
		NodeList repositoriesList = rElement.getElementsByTagName("repository");
170
		for (int i = 0 ; i < repositoriesList.getLength() ; i++) {
171
			Element e = ((Element)repositoriesList.item(i));
172
			if (name.equals(e.getAttribute("name"))) {
173
				return e;
174
			}
175
		}
176

  
177
		return null;
178
	}
179
	
180

  
181
	/**
182
	 * Utility method to get a knowledge repository configuration
183
	 * 
184
	 * @param params
185
	 * @return the repository names
186
	 */
187
	public static List<String> getKnowledgeRepositoryNames(BaseParameters params) {
188
		ArrayList<String> names = new ArrayList<String>();
189
		Element corpusElement = params.getCorpusElement();
190
		if (corpusElement == null) return names;
191
		
192
		Element rElement = getKnowledgeRepositoriesElement(corpusElement);
193
		if (rElement == null) return names;
194
		
195
		NodeList repositoriesList = rElement.getElementsByTagName("repository");
196
		for (int i = 0 ; i < repositoriesList.getLength() ; i++) {
197
			names.add(((Element)repositoriesList.item(i)).getAttribute("name"));
198
		}
199
		
200
		if (names.size() == 0) names.add(params.getCorpusName()); // add default KR = properties & structure properties
201
		return names;
202
	}
203
	
204
	/**
205
	 * 
206
	 * @param corpus the corpus
207
	 * @return the repository names
208
	 */
209
	public static List<String> getKnowledgeRepositoryNames(Corpus corpus) {
210
		if (corpus == null) return new ArrayList<String>();
211
		Base base = corpus.getBase();
212
		if (base == null) return new ArrayList<String>();
213
		BaseParameters params = base.getBaseParameters();
214
		return getKnowledgeRepositoryNames(params);
215
	}
216
	
217
	/**
218
	 * 
219
	 * @param corpus
220
	 * @return true if there are annotations to save for this corpus
221
	 */
222
	public static boolean needToSaveAnnotations(MainCorpus corpus) {
223
		AnnotationManager am = ams.get(corpus);
224
		if (am != null) 
225
			return am.hasChanges();
226
		else
227
			return false;
228
	}
229
	
230
	public void closeAnnotationManager(MainCorpus corpus) {
231
		AnnotationManager am = ams.get(corpus);
232
		if (am != null) {
233
			//System.out.println("  Closing AM...");
234
			am.closeAll();
235
		}
236
	}
237
	
238
	public static Element createKnowledgeRepositoryElement(BaseParameters params, String name) {
239
		Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement());
240
		Element e = rElement.getOwnerDocument().createElement("repository");
241
		e.setAttribute("name", name);
242
		e.setAttribute("mode", "file");
243
		e.setAttribute("user", "false");
244
		e.setAttribute("password", "false");
245
		e.setAttribute("version", "0");
246
		
247
		rElement.appendChild(e);
248
		return e;
249
	}
250
	
251
	@Override
252
	public String getName() {
253
		return "KR Annotation engine";
254
	}
255

  
256
	@Override
257
	public boolean getState() {
258
		return true;
259
	}
260

  
261
	@Override
262
	public boolean initialize() { // nothing to load right now, the kr and am are lazy
263
		return true;
264
	}
265

  
266
	@SuppressWarnings("unchecked")
267
	public boolean[] mustLoginToKnowledgeRepository(Corpus corpus, String kr_name) {
268
		return KnowledgeRepositoryManager.mustLoginToKnowledgeRepository(kr_name, corpus.getMainCorpus());
269
	}
270

  
271
	@Override
272
	public boolean start(IProgressMonitor monitor) {
273
		return true;
274
	}
275

  
276
	@Override
277
	public boolean stop() {
278
		boolean success = true; 
279
		for (KnowledgeRepository kr : KnowledgeRepositoryManager.getKnowledgeRepositories()) {
280
			try {
281
				if (kr.getJPAManager().isOpen())
282
					kr.getJPAManager().close();
283
			} catch(Exception e) {
284
				System.out.println("Error: cannot close KR "+kr+": "+e.getLocalizedMessage());
285
				Log.printStackTrace(e);
286
				success = false;
287
			}
288
		}
289
		for (AnnotationManager am : ams.values()) {
290
			try {
291
				if (am.isOpen())
292
					am.closeAll();
293
			} catch(Exception e) {
294
				System.out.println("Error: cannot close AM "+am+": "+e.getLocalizedMessage());
295
				Log.printStackTrace(e);
296
				success = false;
297
			}
298
		}
299
		
300
		return success;
301
	}
302
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/PropertiesRecoder.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.HashSet;
7
import java.util.LinkedHashMap;
8

  
9
import javax.xml.stream.XMLStreamException;
10

  
11
import org.txm.importer.StaxIdentityParser;
12

  
13
public class PropertiesRecoder extends StaxIdentityParser {
14

  
15
	public PropertiesRecoder(File xmlFile, HashSet<String> fromProperties, HashSet<String> toProperties) throws IOException, XMLStreamException {
16
		super(xmlFile);
17
	}
18
	boolean startW = false, startAna = false;
19
	ArrayList<String[]> anaValues = new ArrayList<String[]>();
20
	String[] currentAnaValues = {"","",""};
21
	StringBuilder anaValue = new StringBuilder();
22
	String EMPTY = "";
23

  
24
	@Override
25
	public void processStartElement() throws XMLStreamException, IOException {
26
		if (localname.equals("w")) {
27
			startW = true;
28
			anaValues.clear();
29
			super.processStartElement();
30
		} else if (localname.equals("ana")) {
31
			if (startW) {
32
				startAna = true;
33
				anaValue.setLength(0);
34
				currentAnaValues[0] = EMPTY;
35
				currentAnaValues[1] = EMPTY;
36
				currentAnaValues[2] = EMPTY;
37
				for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
38
					if (parser.getAttributeLocalName(i).equals("resp")) {
39
						currentAnaValues[0] = parser.getAttributeValue(i);
40
					} else if (parser.getAttributeLocalName(i).equals("type")) {
41
						currentAnaValues[1] = parser.getAttributeValue(i);
42
					}
43
				}
44
			}
45
		} else {
46
			super.processStartElement();
47
		}
48
	}
49

  
50
	@Override
51
	public void processCharacters() throws XMLStreamException {
52
		if (startAna) {
53
			anaValue.append(parser.getText());
54
		} else {
55
			super.processCharacters();
56
		}
57
	};
58

  
59
	@Override
60
	public void processEndElement() throws XMLStreamException {
61
		if (localname.equals("w")) {
62
			startW = false;
63

  
64
			for (String[] values : anaValues) {
65
				try {
66
					writer.writeStartElement("txm:ana");
67
					writer.writeAttribute("resp", values[0]);
68
					writer.writeAttribute("type", values[1]);
69
					writer.writeCharacters(values[2]);
70
					writer.writeEndElement();
71
				} catch (XMLStreamException e) {
72
					// TODO Auto-generated catch block
73
					e.printStackTrace();
74
				}
75
			}
76

  
77
			super.processEndElement();
78
		} else if (localname.equals("ana")) {
79
			if (startW) {
80
				currentAnaValues[2] = anaValue.toString();
81
				anaValues.add(currentAnaValues);
82
				startAna = false;
83
			}
84
		} else {
85
			super.processEndElement();
86
		}
87
	}
88

  
89
	public static void main(String args[]) {
90
		try {
91
			File xmlFile = new File("/home/mdecorde/TEMP/test.xml");
92
			File outfile = new File("/home/mdecorde/TEMP/test-o.xml");
93
			HashSet<String> fromProperties = new HashSet<String>();
94
			HashSet<String> toProperties = new HashSet<String>();
95

  
96
			PropertiesRecoder recoder = new PropertiesRecoder(xmlFile, fromProperties, toProperties);
97
			System.out.println("Result: "+recoder.process(outfile));
98
		} catch(Exception e) {
99
			e.printStackTrace();
100
		}
101
	}
102
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/AnnotationComparator.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.text.DateFormat;
4
import java.text.SimpleDateFormat;
5
import java.util.Comparator;
6
import java.util.StringTokenizer;
7

  
8
public class AnnotationComparator implements Comparator<Annotation> {
9

  
10
	public int compare(Annotation a1, Annotation a2) {
11
	    // comparer e1 et e2
12
		
13
		DateFormat dateformat =  new SimpleDateFormat("yyyy-MM-dd");
14
		//System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate()));
15
		//System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate()));
16
		StringTokenizer tokenizer1 = new StringTokenizer(dateformat.format(a1.getDate()), "-");
17
		StringTokenizer tokenizer2 = new StringTokenizer(dateformat.format(a2.getDate()), "-");
18
		
19
		for(int i = 0 ; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens() ; ++i) {
20
			String token1 = tokenizer1.nextToken();
21
			String token2 = tokenizer2.nextToken();
22
			int valint1 = new Integer(token1).intValue();
23
			int valint2 = new Integer(token2).intValue();
24
			//System.out.println(valint1+" | "+valint2);
25
			if (valint1<valint2){
26
				return -1;
27
			}else {
28
				if(valint1>valint2){
29
					return 1;
30
				}else {
31
					//System.out.println("idem ["+i+"]");
32
				}
33
				
34
			}
35
		}
36
		
37
		return 0;
38
			
39
	  }
40
	
41
	public static void main(String[] args) {
42
		AnnotationComparator comp = new AnnotationComparator();
43
		Annotation a1 = new Annotation("truc", "bidule", 3, 10);
44
		Annotation a2 = new Annotation("truc", "bidule", 6, 8);
45
		int ret = comp.compare(a1, a2);
46
		System.out.println("Le retour est : "+ret);
47
	}
48
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/DatabasePersistenceManager.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.util.HashMap;
4

  
5
import javax.persistence.EntityManager;
6

  
7
public class DatabasePersistenceManager {
8

  
9
	//Object can be Corpus or KnowledgeRepository
10
	protected HashMap<Object, EntityManager> managers;
11
	public static final String PERSISTENCE_UNIT_NAME = "HSQLKRPERSISTENCE";
12
	public static String ACCESS_SQL = "sql";
13
	public static String ACCESS_FILE = "file";
14
	public static String ACCESS_SPARQL = "sparql";
15

  
16
	/**
17
	 * Instantiates a new database manager.
18
	 */
19
	public DatabasePersistenceManager() {
20
		managers = new HashMap<Object, EntityManager>() ;
21
	}
22

  
23
	/**
24
	 * The Object can be a Corpus or a KnowledgeRepository
25
	 * @param obj
26
	 * @return
27
	 */
28
	public EntityManager getJPAEntityManager(Object obj){
29
		if (managers.containsKey(obj)) {
30
			return managers.get(obj);
31
		}
32
		return null;
33
	}
34

  
35
	public void closeManager(Object key) {
36
		if (!managers.keySet().contains(key)) return;
37

  
38
		managers.get(key).close();
39
		managers.remove(key);
40
	}
41

  
42
	public void closeAllManagers() {
43
		for (Object key : managers.keySet()) {
44
			EntityManager m = managers.get(key);
45
			m.flush();
46
			m.createNativeQuery("SHUTDOWN;");
47
			m.close();
48
		}
49
		managers.clear();
50
	}
51
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/AnnotationManager.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.io.File;
4
import java.util.ArrayList;
5
import java.util.Arrays;
6
import java.util.HashMap;
7
import java.util.List;
8

  
9
import org.eclipse.core.runtime.IProgressMonitor;
10
import org.txm.annotation.core.repository.AnnotationEffect;
11
import org.txm.annotation.core.repository.AnnotationType;
12
import org.txm.annotation.core.repository.TypedValue;
13
import org.txm.annotation.core.storage.temporary.TemporaryAnnotationManager;
14
import org.txm.core.engines.Engine;
15
import org.txm.searchengine.cqp.corpus.MainCorpus;
16
import org.txm.searchengine.cqp.corpus.query.Match;
17
import org.txm.utils.logger.Log;
18

  
19
/**
20
 * Manage annotations and is able to return annotation saved in JPA
21
 * 
22
 * @author mdecorde
23
 *
24
 */
25
public class AnnotationManager implements Engine {
26

  
27
	MainCorpus corpus;
28
	TemporaryAnnotationManager tempManager;
29
	CQPAnnotationManager cqpManager;
30

  
31
	public AnnotationManager(MainCorpus mainCorpus){
32
		this.corpus = mainCorpus;
33
	}
34

  
35
	public TemporaryAnnotationManager getTemporaryManager(){
36
		return tempManager;
37
	}
38
	
39
	public CQPAnnotationManager getCQPManager(){
40
		return cqpManager;
41
	}
42
	
43
	public boolean saveAnnotations() throws Exception{
44
		List<Annotation> annots = tempManager.getAnnotations();
45
		if (annots.isEmpty()) {
46
			System.out.println("No annotation to save. Aborting.");
47
			return true;
48
		}
49

  
50
		AnnotationWriter writer = new AnnotationWriter(corpus);
51
		if (writer.writeAnnotations(annots)) {
52
			Log.info("Annotations succesfully written. Deleting temporary annotations...");
53
			tempManager.deleteAnnotations();
54

  
55
			return true;
56
		}
57
		return false;
58
	}
59

  
60
	public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception{
61
		AnnotationWriter writer = new AnnotationWriter(corpus);
62
		
63
		if (writer.writeAnnotationsInStandoff(resultZipFile)) {
64
			Log.info("Annotations succesfully written in "+resultZipFile);
65
			return true;
66
		}
67
		return false;
68
	}
69
	
70
	//TODO: not ended?
71
	/**
72
	 * Deletes the annotations stored in the temporary annotation manager
73
	 * @param type
74
	 * @param job
75
	 * @return
76
	 * @throws Exception
77
	 */
78
	public boolean deleteAnnotations(AnnotationType type, IProgressMonitor job) throws Exception {
79
		List<Annotation> temporaryAnnotations = null;
80
		List<Annotation> cqpAnnotations = null;
81
		try {
82
			temporaryAnnotations = tempManager.getAnnotations(type);
83
			tempManager.getEntityManager().getTransaction().begin();
84
			for (Annotation a : temporaryAnnotations){
85
				if (job != null && job.isCanceled()) {
86
					System.out.println("Delete annotation canceled.");
87
					return false;
88
				}
89
				tempManager.deleteAnnotation(type, a.getStart(), a.getEnd());
90
			}
91
			tempManager.getEntityManager().getTransaction().commit();
92
			
93
			cqpAnnotations = cqpManager.getAnnotations(type);
94
			tempManager.getEntityManager().getTransaction().begin();
95
			for (Annotation a : cqpAnnotations) {
96
				if (job != null && job.isCanceled()) {
97
					System.out.println("Delete annotation canceled.");
98
					return false;
99
				}
100
				String value = cqpManager.getCQPAnnotationValue(a.getStart(), a.getEnd(), type);
101
				if (value != null) {
102
					tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()),  a.getStart(), a.getEnd());
103
				} else {
104
					tempManager.deleteAnnotationNoCommit(type, a.getStart(), a.getEnd());
105
				}
106
			}
107
			tempManager.getEntityManager().getTransaction().commit();
108
		} catch(Exception e) {
109
			e.printStackTrace();
110
			return false;
111
		}
112
		return true;
113
	}
114
	
115
	public boolean deleteAnnotations(AnnotationType type, List<Match> matches, IProgressMonitor job) throws Exception {
116
		try {
117
			tempManager.getEntityManager().getTransaction().begin();
118
			for (Match m : matches) {
119
				if (job != null && job.isCanceled()) {
120
					System.out.println("Delete annotation canceled.");
121
					return false;
122
				}
123
				
124
				String value = cqpManager.getCQPAnnotationValue(m.getStart(), m.getEnd(), type);
125

  
126
				if (value != null) {
127
					tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()),  m.getStart(), m.getEnd());
128
				} else {
129
					tempManager.deleteAnnotationNoCommit(type, m.getStart(), m.getEnd());
130
				}
131
			}
132
			tempManager.getEntityManager().getTransaction().commit();
133
		} catch (Exception e) {
134
			e.printStackTrace();
135
			return false;
136
		}
137
		return true;
138
	}
139

  
140
	/**
141
	 * Returns the annotation saved in the temporary database and in the CQP corpus indexes
142
	 * 
143
	 * CQP Annotations must be shadowed by temporary annotations of the same type and positions
144
	 */
145
	public List<Annotation> getAnnotationsForMatches(AnnotationType type, List<Match> matches, boolean overlap) {
146
		List<Annotation> temporaryAnnotations = null;
147
		List<Annotation> resultAnnotations = new ArrayList<Annotation>();
148
		try {
149
			temporaryAnnotations = tempManager.getAnnotations(type, matches, null, false, overlap);
150
			temporaryAnnotations = tempManager.getAnnotationsForMatches(matches, temporaryAnnotations, overlap);
151
			
152
			List<Annotation> cqpAnnotations = cqpManager.getAnnotationsForMatches(matches, type, overlap);
153

  
154
		//	System.out.println("Temporary annotations: "+temporaryAnnotations);
155
		//	System.out.println("CQP annotations: "+cqpAnnotations);
156
			if (cqpAnnotations.size() != matches.size() || temporaryAnnotations.size() != matches.size()) {
157
				System.out.println("ERROR in getAnnotationsForMatches methods! ");
158
				return new ArrayList<Annotation>(matches.size());
159
			}
160
			// merge the 2 results
161
			for (int i = 0 ; i < matches.size() ; i++) {
162
				if (cqpAnnotations.get(i) == null && temporaryAnnotations.get(i) == null) {
163
					resultAnnotations.add(null);
164
				} else if (temporaryAnnotations.get(i) != null) {
165
					resultAnnotations.add(temporaryAnnotations.get(i));
166
				} else if (cqpAnnotations.get(i) != null) {
167
					resultAnnotations.add(cqpAnnotations.get(i));
168
				} else {
169
					resultAnnotations.add(null);
170
				}
171
			}
172
		} catch (Exception e) {
173
			e.printStackTrace();
174
			return new ArrayList<Annotation>(matches.size());
175
		}
176
		return resultAnnotations;
177
	}
178

  
179
	public void clearInstance() {
180
		try {
181
			tempManager.close();
182
		} catch(Exception e) {
183
			System.out.println("Fail to clear AnnotationManager instance: "+e);
184
			org.txm.utils.logger.Log.printStackTrace(e);
185
		}
186
	}
187

  
188
	public void checkData() {
189
		try {
190
			tempManager.checkData();
191
		} catch (Exception e) {
192
			// TODO Auto-generated catch block
193
			e.printStackTrace();
194
		}
195
	}
196

  
197
	public boolean hasChanges() {
198
		return tempManager.hasChanges();
199
	}
200

  
201
	/**
202
	 * 
203
	 * @param annotSelectedType not null
204
	 * @param annotSelectedTypedValue not null
205
	 * @param matches not null
206
	 * @param job may be null
207
	 * @return
208
	 */
209
	public HashMap<Match,List<Annotation>> createAnnotations(AnnotationType annotSelectedType,
210
			TypedValue annotSelectedTypedValue, List<Match> matches, IProgressMonitor job) {
211

  
212
		HashMap<Match,List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>();
213
		tempManager.getEntityManager().getTransaction().begin(); // warning
214
		for (Match match : matches) {
215
			allAnnotationsThatCollides.put(match, new ArrayList<Annotation>());
216
			
217
			if (job != null && job.isCanceled()) { // check if user canceled the job
218
				System.out.println("Affect annotation canceled.");
219
				return null;
220
			}
221
			
222
			try {
223
				List<Annotation> cqpAnnotations = null;
224
				if (annotSelectedType.getEffect().equals(AnnotationEffect.SEGMENT)) {
225
					cqpAnnotations = cqpManager.getAnnotations(null, match, null, true); // get all annotations
226
					// remove A)the wrapping annotations and B) the annotation with same type and same positions
227
					for (int i = 0 ; i < cqpAnnotations.size() ; i++) {
228
						Annotation a = cqpAnnotations.get(i);
229

  
230
						// exact match + exact type
231
						if (a.getType().equals(annotSelectedType.getId()) && a.getStart() == match.getStart() && a.getEnd() == match.getEnd()) {
232
							cqpAnnotations.remove(i);
233
							i--;
234
						} else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap
235
								(a.getStart() <= match.getStart() && match.getEnd() <= a.getEnd()) ||
236
								(match.getStart() <= a.getStart() && a.getEnd() <= match.getEnd())
237
								)) {
238
							cqpAnnotations.remove(i);
239
							i--;
240
						}
241
					}
242
				} else {
243
					// no need to test collision (AnnotationType=TOKEN)
244
					cqpAnnotations = new ArrayList<Annotation>();
245
				}
246
				
247
				if (cqpAnnotations.size() > 0) {
248
					allAnnotationsThatCollides.get(match).addAll(cqpAnnotations);
249
					continue; // don't create annotation, process next match
250
				} else { // test with temporary annotation manager 
251
					List<Annotation> tempAnnotations = null;
252
					if (match.getTarget() >= 0) { // the @ operator has been used, annotate only the @position
253
						tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getTarget(), match.getTarget());
254
					} else {
255
						tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getStart(), match.getEnd());
256
					}
257
					if (tempAnnotations.size() > 0)
258
						allAnnotationsThatCollides.get(match).addAll(tempAnnotations);
259
				}
260
			} catch (Exception e) {
261
				Log.printStackTrace(e);
262
				System.out.println("Error during annotation creation: "+e);
263
			}
264
			
265
			if (allAnnotationsThatCollides.get(match).size() == 0) allAnnotationsThatCollides.remove(match); // keep only colision lists
266
		}
267
		tempManager.getEntityManager().getTransaction().commit(); // warning
268
		// test if there are CQP annotations for the 
269
		return allAnnotationsThatCollides;
270
	}
271

  
272
	public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) {
273
		List<Annotation> temporaryAnnotations = null;
274
		List<Annotation> cqpAnnotations = null;
275
		try {
276
			temporaryAnnotations = tempManager.getAnnotations(type, Arrays.asList(new Match(start, end)), null, false, overlap);
277
			cqpAnnotations = cqpManager.getAnnotations(type, start, end, overlap);
278
			
279
			int i = 0;
280
			for (Annotation a : cqpAnnotations) {
281
				while (temporaryAnnotations.get(i).getStart() < a.getStart()) {
282
					i++;
283
				}
284
				temporaryAnnotations.add(i, a);
285
			}
286
		} catch(Exception e) {
287
			
288
		}
289
		return temporaryAnnotations;
290
	}
291
	
292
	public List<Annotation> getAnnotations(AnnotationType type, int i, int j) {
293
		return getAnnotations(type, i, j, false);
294
	}
295

  
296
	public void closeAll() {
297
		Log.info("Closing annotation manager of "+corpus);
298
		tempManager.close();
299
		cqpManager.close();
300
	}
301

  
302
	public boolean isOpen() {
303
		return tempManager.getEntityManager() != null && tempManager.getEntityManager().isOpen();
304
	}
305

  
306
	@Override
307
	public String getName() {
308
		return "Annotation";
309
	}
310

  
311
	@Override
312
	public boolean getState() {
313
		return isOpen();
314
	}
315

  
316
	@Override
317
	public boolean initialize() throws Exception {
318
		tempManager = new TemporaryAnnotationManager(corpus);
319
		cqpManager = new CQPAnnotationManager(corpus);
320
		return false;
321
	}
322

  
323
	@Override
324
	public boolean start(IProgressMonitor arg0) throws Exception {
325
		return true;
326
	}
327

  
328
	@Override
329
	public boolean stop() throws Exception {
330
		return true;
331
	}
332
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/AnnotationPK.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.io.Serializable;
4

  
5
import javax.persistence.*;
6

  
7
@Embeddable
8
public class AnnotationPK implements Serializable {
9

  
10
	private static final long serialVersionUID = -2360693333015275209L;
11

  
12
	//corresponding to the start and end positions (in the corpus)
13
	private int startpos;
14
	private int endpos;
15
	private String refType;
16

  
17
	public AnnotationPK() {
18
	}
19

  
20
	public AnnotationPK(int start, int end, String refType) {
21
		this.startpos = start;
22
		this.endpos = end;
23
		this.refType = refType;
24
	}
25

  
26
	public void setRefType(String refType) {
27
		this.refType = refType;
28
	}
29

  
30
	public void setStartPosition(int start) {
31
		this.startpos = start;
32
	}
33

  
34
	public void setEndPosition(int end) {
35
		this.endpos = end;
36
	}
37

  
38
	public String getRefType() {
39
		return refType;
40
	}
41

  
42
	public int getStartPosition() {
43
		return startpos;
44
	}
45

  
46
	public int getEndPosition() {
47
		return endpos;
48
	}
49

  
50
	public boolean equals(Object obj) {
51
		if (obj == null) return false;
52
		if (obj == this) return true;
53
		if (!(obj instanceof AnnotationPK)) return false;
54

  
55
		AnnotationPK other = (AnnotationPK) obj;
56
		return startpos == other.startpos && endpos == other.endpos&& refType.equals(other.refType);
57
	}
58

  
59
	public int hashCode() {
60
		return refType.hashCode()+startpos+endpos;
61
	}
62

  
63
	public String toString() {
64
		return getRefType() + "["+getStartPosition()+"-"+getEndPosition()+"]" ;
65
	}
66
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/AnnotationSyMoGIHWriter.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.io.File;
4
import java.io.FileWriter;
5
import java.io.IOException;
6
import java.net.MalformedURLException;
7
import java.util.ArrayList;
8
import java.util.HashMap;
9
import java.util.HashSet;
10
import java.util.List;
11

  
12
import javax.xml.stream.XMLInputFactory;
13
import javax.xml.stream.XMLOutputFactory;
14
import javax.xml.stream.XMLStreamException;
15
import javax.xml.stream.XMLStreamWriter;
16

  
17
import org.txm.annotation.core.repository.AnnotationType;
18
import org.txm.importer.StaxIdentityParser;
19

  
20
/**
21
 * The Class AnnotationStandoffInjector.
22
 *
23
 * @author sgedzelman, mdecorde
24
 *
25
 * copy a XML-TXM file without annotations elements
26
 * and creates annotations XML-TEI-SymoGIH annotation files for each annotation author
27
 * 
28
 */
29
public class AnnotationSyMoGIHWriter extends StaxIdentityParser {
30

  
31
	File xmlStandOffDirectory;
32
	boolean debug = false;
33

  
34
	String currentRef ;
35
	String currentAuthor ;
36
	String currentDate;
37
	String currentStartPos ;
38
	String currentEndPos ;
39
	//read xmlFile, to find annotations and update/write to xmlstandofffile
40
	//order annotations by annotator
41
	////// order annotations by date
42
	HashSet<String> types = new HashSet<String>();
43
	ArrayList<String> positions;
44
	HashMap<String, ArrayList<String>> annotationsPositions;
45
	XMLStreamWriter currentWriter;
46
	XMLStreamWriter standoffWriter;
47
	String currentType;
48
	boolean startAnnotation = false;
49
	private String filename;
50

  
51
	// author -> date -> annotation_values
52
	HashMap<String, HashMap<String, ArrayList<String[]>>> allannotations = new HashMap<String, HashMap<String, ArrayList<String[]>>>();
53
	HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>> allannotationspositions = new HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>>();
54
	private String textid;
55

  
56
	/**
57
	 * 
58
	 * @param xmlStandOffFile
59
	 * @param tokenAnnotations 
60
	 * @param annotationsToAddByStartPos
61
	 * @param annotationsToAddByEndPos
62
	 * @param corpus_start_position
63
	 * @param debug
64
	 * @throws XMLStreamException 
65
	 * @throws IOException 
66
	 */
67
	public AnnotationSyMoGIHWriter(String textid, File xmlFile, File xmlStandOffDirectory, List<AnnotationType> types, boolean debug) throws IOException, XMLStreamException {
68
		super(xmlFile.toURI().toURL()); // init reader and writer
69
		this.textid = textid;
70
		this.filename = xmlFile.getName();
71
		this.debug = debug;
72
		this.xmlStandOffDirectory = xmlStandOffDirectory;
73
		factory = XMLInputFactory.newInstance();
74
		annotationsPositions = new HashMap<String, ArrayList<String>>();
75
		//System.out.println("AnnotationStandoff -  ");
76
		for (AnnotationType type : types){
77
			this.types.add(type.getId()); //.getName().toLowerCase());
78
			//System.out.println("Annotation Types in TXM : "+type.getName().toLowerCase() +" vs "+ type.getId());
79
		}
80
	}
81

  
82
	/*<TEI xmlns="http://www.tei-c.org/ns/1.0">
83
    <teiHeader>
84
        <fileDesc>
85
            <titleStmt>
86
                <title>Title</title>
87
            </titleStmt>
88
            <publicationStmt>
89
                <p>Publication Information</p>
90
            </publicationStmt>
91
            <sourceDesc>
92
                <p>Ce document permet l'annotation sémantique de tous les textes concernant l'association avec des unités de connaissance</p>
93
            </sourceDesc>
94
        </fileDesc>
95
    </teiHeader>
96
    <text>
97
        <body>
98
            <div>
99
                <div>
100
                    <!-- La date dans le header indique la date d'annotation -->
101
                    <head>
102
                        <date type="annotation_date" when="2016-06-16"/>
103
                    </head>
104
                    <span type="identification d'entités nommées" ana="CoAc56389" 
105
                          target="#w_article_baip_1254-0714_1850_num_01_005_974_tei_2152 
106
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2153 
107
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2154 
108
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2155 
109
                                  #w_article_baip_1254-0714_1850_num_01_005_974_tei_2156" />
110
                </div>
111
            </div>
112
         </body>
113
      </text>
114
 </TEI>*/
115

  
116
	protected void processStartElement() throws XMLStreamException, IOException {
117
		//<coac author="gazelledess" ref="CoAc397" start="5" end="5">
118

  
119
		/*if(localname.startsWith("actr")){
120
			System.out.println("Check existence of actr in Corpus !!!! "+localname);
121
		}*/
122
		boolean foundAnnot = false;
123

  
124
		
125
		if (types.contains(localname) && parser.getPrefix().equals("txm")) { // don't write txm annotation elements
126
			//System.out.println(" START "+ localname);
127
			foundAnnot = true;
128
			currentType = localname;
129
			//<txm:actr author="gazelledess" ref="PhileasFogg" date="2016-09-05" start="56" end="57">
130
			currentAuthor = parser.getAttributeValue(null, "author");
131
			currentRef = parser.getAttributeValue(null, "ref");
132
			currentStartPos = parser.getAttributeValue(null, "start");
133
			currentEndPos = parser.getAttributeValue(null, "end");
134
			currentDate = parser.getAttributeValue(null, "date");
135
			//annotation is here
136
			startAnnotation = true;
137
			positions = new ArrayList<String>();
138
			annotationsPositions.put(currentType, positions);
139

  
140
			// initialize allannotations
141
			if (!allannotations.containsKey(currentAuthor)) {
142
				allannotations.put(currentAuthor, new HashMap<String, ArrayList<String[]>>());
143
				allannotationspositions.put(currentAuthor, new HashMap<String, ArrayList<ArrayList<String>>>());
144
			}
145
			HashMap<String, ArrayList<String[]>> authorsAnnotation = allannotations.get(currentAuthor);
146
			HashMap<String, ArrayList<ArrayList<String>>> authorsAnnotationPositions = allannotationspositions.get(currentAuthor);
147
			if (!authorsAnnotation.containsKey(currentDate)) {
148
				authorsAnnotation.put(currentDate, new ArrayList<String[]>());
149
				authorsAnnotationPositions.put(currentDate, new ArrayList<ArrayList<String>>());
150
			}
151

  
152
			storeAnnotation();
153
		}
154

  
155
		if (!foundAnnot) {
156
			super.processStartElement(); /// continue writing in file all elements, except the tags that are now in stand-off files
157

  
158
			// get words ids of the current annotations
159
			if (localname.equals("w") && startAnnotation) {
160
				String id = parser.getAttributeValue(null, "id");
161
				for (String typeIn : annotationsPositions.keySet()) {
162
					positions = annotationsPositions.get(typeIn);
163
					positions.add(id);
164
					//System.out.println("Positions of w id="+posW+" for ["+typeIn+"] ");
165
				}
166
			}
167
		}
168
	}
169

  
170

  
171
	/**
172
	 * ends the current author stand-off file
173
	 * @param currentWriter
174
	 */
175
	private void writeEndStandOffFile(XMLStreamWriter currentWriter){
176
		//System.out.println("writeEndStandOffFile ...");
177

  
178
		try {
179
			currentWriter.writeEndElement(); 
180
			currentWriter.writeEndDocument();
181

  
182
			currentWriter.flush();
183
			currentWriter.close();
184
		} catch (XMLStreamException e) {
185
			e.printStackTrace();
186
		}
187

  
188
	}
189

  
190
	/**
191
	 * Create the stand-off file for one author
192
	 * @param file
193
	 * @return
194
	 */
195
	private XMLStreamWriter writeStartStandOffFile(File file){
196
		//System.out.println("writeStartStandOffFile ...");
197

  
198
		String ns = "http://www.tei-c.org/ns/1.0";    
199
		XMLOutputFactory output = XMLOutputFactory.newInstance();
200
		XMLStreamWriter writer = null ;
201
		try {
202
			writer = output.createXMLStreamWriter(new FileWriter(file));
203
			writer.writeStartDocument();
204
			writer.setPrefix("tei", ns);
205
			writer.setDefaultNamespace(ns);
206

  
207
			writer.writeStartElement("TEI");
208

  
209
			writer.writeStartElement("teiHeader");
210
			writer.writeStartElement("fileDesc");
211

  
212
			writer.writeStartElement("titleStmt");
213
			writer.writeStartElement("title");
214
			writer.writeCharacters(textid); 
215
			writer.writeEndElement(); // title
216
			writer.writeEndElement(); // titleStmt
217

  
218
			writer.writeStartElement("publicationStmt");
219
			writer.writeStartElement("p");
220
			writer.writeCharacters("PUBLICATION INFO à renseigner");
221
			writer.writeEndElement(); // p
222
			writer.writeEndElement(); // publicationStmt
223

  
224
			writer.writeStartElement("sourceDesc");
225
			writer.writeStartElement("p");
226
			writer.writeCharacters("Ce document permet l'annotation sémantique de tous les textes, par auteur");
227
			writer.writeEndElement(); // p
228
			writer.writeEndElement(); // sourceDesc
229

  
230
			writer.writeEndElement(); // </fileDesc>
231
			writer.writeStartElement("encodingDesc");
232
			writer.writeStartElement("projectDesc");
233
			writer.writeStartElement("p");
234
			writer.writeCharacters("Annotations created by "+currentAuthor+", for the use in Symogih XML platform");
235
			writer.writeEndElement(); // p
236
			writer.writeEndElement(); // </projectDesc>
237
			writer.writeEndElement(); // </encodingDesc>
238
			writer.writeEndElement(); // </teiHeader>
239

  
240

  
241
			writer.writeStartElement("text");
242
			writer.writeStartElement("body");
243
			writer.writeCharacters("\n");
244
			writer.writeStartElement("div");
245
			writer.writeCharacters("\n");
246
		} catch (XMLStreamException e) {
247
			// TODO Auto-generated catch block
248
			e.printStackTrace();
249
		} catch (IOException e) {
250
			// TODO Auto-generated catch block
251
			e.printStackTrace();
252
		}
253
		return writer;
254
	}
255

  
256
	private void storeAnnotation() {
257
		allannotations.get(currentAuthor).get(currentDate).add(new String[]{currentDate, currentRef, currentType});
258
	}
259

  
260
	private void storeAnnotationPositions() {
261
		allannotationspositions.get(currentAuthor).get(currentDate).add(positions);
262
	}
263

  
264
	/**
265
	 * write stand-off annotation
266
	 */
267
	private void writeStartAnnotationToStandoffFile(String[] data, ArrayList<String> positions) {
268
		//System.out.println("writeStartAnnotationToStandoffFile ...");
269
		try {
270
			currentWriter.writeStartElement("span");
271
			currentWriter.writeAttribute("type","named_entities_identifications");
272
			currentWriter.writeAttribute("ana", data[1]);
273
			StringBuffer listWids = new StringBuffer();
274
			for (String posW : positions) {
275
				listWids.append("#"+posW+" ");
276
			}
277
			currentWriter.writeAttribute("target", listWids.toString().trim());
278
			currentWriter.writeComment("type="+data[2]);
279
			currentWriter.writeEndElement(); // span
280
			currentWriter.writeCharacters("\n");
281

  
282
		} catch (XMLStreamException e) {
283
			// TODO Auto-generated catch block
284
			e.printStackTrace();
285
		}
286
	}
287

  
288
	protected void processEndElement() throws XMLStreamException {
289
		boolean foundAnnot = false;
290
		if (types.contains(localname) && parser.getPrefix().equals("txm")) { // skip annotation end element
291
			//System.out.println(" END "+ localname);
292
			foundAnnot = true;
293
			//annotation ends here
294
			storeAnnotationPositions();
295

  
296
			if (annotationsPositions.containsKey(localname)) {
297
				annotationsPositions.remove(localname);
298
			}
299

  
300
			startAnnotation = false;
301
		}
302

  
303
		if (!foundAnnot) {
304
			if (localname.equals("TEI")) {
305
				try {
306
					for (String author : allannotations.keySet()) {
307
						System.out.println("    author="+author);
308
						File currentXmlFile = new File(xmlStandOffDirectory, filename.substring(0, filename.length()-4)+"_"+currentAuthor+"_annotations.xml");
309

  
310
						// write start of the stand-off file
311
						currentWriter = writeStartStandOffFile(currentXmlFile);
312

  
313
						// write annotations grouped per date
314
						HashMap<String, ArrayList<String[]>> dates = allannotations.get(author);
315
						HashMap<String, ArrayList<ArrayList<String>>> datesPositions = allannotationspositions.get(author);
316
						for (String date : datesPositions.keySet()) {
317
							ArrayList<String[]> datas = dates.get(date);
318
							ArrayList<ArrayList<String>> positions = datesPositions.get(date);
319

  
320
							currentWriter.writeStartElement("div");
321
							currentWriter.writeCharacters("\n");
322
							currentWriter.writeStartElement("head");
323
							currentWriter.writeStartElement("date");
324
							currentWriter.writeAttribute("type","annotation_date");
325
							currentWriter.writeAttribute("when", date); 
326
							currentWriter.writeEndElement(); // date
327
							currentWriter.writeEndElement(); // head
328
							currentWriter.writeCharacters("\n");
329

  
330
							for (int i = 0 ; i < datas.size() ; i++) {
331
								writeStartAnnotationToStandoffFile(datas.get(i), positions.get(i));
332
							}
333

  
334
							currentWriter.writeEndElement(); // div
335
							currentWriter.writeCharacters("\n");
336
						}
337

  
338
						// write the end of the stand-off file
339
						writeEndStandOffFile(currentWriter);
340
					}
341
				}
342
				catch(Exception e){
343
					System.out.println("Current writer "+currentWriter);
344
				}
345
			}
346
			super.processEndElement();
347
		}
348
	}
349

  
350
	public static void main(String strs[]){
351
		try {
352
		File xmlFile = new File("/home/mdecorde/TXM/corpora/TDM80J/txm/TDM80J", "tdm80j.xml");
353
		File outfile = new File(xmlFile.getParentFile(), "tdm80j-tei.xml");
354

  
355
		File xmlStandOffDirectory = xmlFile.getParentFile();
356
		List<AnnotationType> types = new ArrayList<AnnotationType>();
357
		types.add(new AnnotationType("local","actr_id", "actr"));
358
		AnnotationSyMoGIHWriter annotStdff;
359
		try {
360
			annotStdff = new AnnotationSyMoGIHWriter("TDBM80J", xmlFile, xmlStandOffDirectory, types, true);
361
			annotStdff.process(outfile);
362
		} catch (MalformedURLException e) {
363
			// TODO Auto-generated catch block
364
			e.printStackTrace();
365
		}
366
		} catch(Exception e) {
367
			e.printStackTrace();
368
		}
369
	}
370
}
tmp/org.txm.annotation.core/src/org/txm/annotation/core/AnnotationWriter.java (revision 853)
1
package org.txm.annotation.core;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.util.ArrayList;
6
import java.util.HashMap;
7
import java.util.List;
8
import java.util.logging.Level;
9

  
10
import javax.xml.stream.XMLStreamException;
11

  
12
import org.apache.commons.lang.StringUtils;
13
import org.txm.Toolbox;
14
import org.txm.annotation.core.repository.AnnotationEffect;
15
import org.txm.annotation.core.repository.AnnotationType;
16
import org.txm.annotation.core.repository.KnowledgeRepository;
17
import org.txm.core.preferences.TBXPreferences;
18
import org.txm.core.preferences.TXMPreferences;
19
import org.txm.importer.ValidateXml;
20
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
21
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException;
22
import org.txm.searchengine.cqp.corpus.MainCorpus;
23
import org.txm.searchengine.cqp.serverException.CqiServerError;
24
import org.txm.stat.utils.ConsoleProgressBar;
25
import org.txm.utils.DeleteDir;
26
import org.txm.utils.logger.Log;
27
import org.txm.utils.zip.Zip;
28

  
29

  
30
public class AnnotationWriter {
31

  
32
	MainCorpus corpus;
33
	private List<AnnotationType> types;
34
	private KnowledgeRepository defaultKR;
35

  
36
	public AnnotationWriter(MainCorpus corpus){
37
		this.corpus = corpus;
38
		defaultKR = KRAnnotationEngine.getKnowledgeRepository(corpus, KRAnnotationEngine.getKnowledgeRepositoryNames(corpus).get(0));
39
		types = defaultKR.getAllAnnotationTypes();
40
		
41
	}
42

  
43
	/**
44
	 * process a text to build standoff files
45
	 * 
46
	 * @param textid
47
	 * @param currentXMLFile
48
	 * @param currentXMLStandoffFile
49
	 * @param xmlStandOffDirectory
50
	 * @return
51
	 * @throws IOException
52
	 * @throws CqiServerError
53
	 * @throws CqiClientException
54
	 * @throws InvalidCqpIdException
55
	 * @throws XMLStreamException 
56
	 */
57
	protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
58
		System.out.println("  text="+textid);
59
		boolean show_debug = Log.getLevel().intValue() < Level.WARNING.intValue();
60
		
61
		AnnotationSyMoGIHWriter annotationstdoff = new AnnotationSyMoGIHWriter(textid, currentXMLFile, xmlStandOffDirectory, types, show_debug);
62
		
63
		///rather test on the new xml standoff files
64
		if (annotationstdoff.process(currentXMLStandoffFile)) {
65
			if (ValidateXml.test(currentXMLStandoffFile)) { //TODO ALSO check if annotations are well-written 
66
				return true;
67
			} else {
68
				System.out.println("Error: result file "+currentXMLStandoffFile+" is malformed.");
69
			}
70
		} else {
71
			System.out.println("Error: while processing "+currentXMLStandoffFile+" in standoff dir");
72
		}
73
		return false;
74
		
75
	}
76
	
77
	/**
78
	 * Writing annotations in standoff files for each text of the corpus
79
	 * 
80
	 * @param allCorpusAnnotations
81
	 * @return
82
	 * @throws IOException
83
	 * @throws CqiServerError
84
	 * @throws CqiClientException
85
	 * @throws InvalidCqpIdException
86
	 * @throws XMLStreamException 
87
	 */
88
	public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
89
		
90
		List<String> textsIds = corpus.getTextsID();
91
		System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+".");
92

  
93
		File resultDirectory = new File(TXMPreferences.getString(TBXPreferences.USER_TXM_HOME, TBXPreferences.PREFERENCES_NODE), "results/"+corpus.getName()+"_annotations");
94
		DeleteDir.deleteDirectory(resultDirectory);
95
		resultDirectory.mkdirs();
96
		if (!(resultDirectory.exists() && resultDirectory.canWrite())) {
97
			System.out.println("ERROR: could not create/write temporary directory: "+resultDirectory);
98
			return false;
99
		}
100
		
101
		File inputDirectory = corpus.getBaseDirectory();
102
		File corpusTxmDirectory = new File(inputDirectory, "txm/"+corpus.getName());
103
		
104
		System.out.println("Writing annotations XML files in "+resultDirectory);
105
		for (String textid : textsIds) { 
106
			File currentXMLFile = new File(corpusTxmDirectory, textid+".xml");
107
			if (!currentXMLFile.exists()) {
108
				System.out.println("Error: cannot found XML file for text with id="+textid);
109
				return false;
110
			}
111
			File currentXMLStandoffFile = new File(resultDirectory, textid+".xml"); //To Be Changed ?
112
			if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory )) {
113
						System.out.println("Error while writing annotations of text "+currentXMLStandoffFile);
114
						return false;
115
			}
116
		}
117
		
118
		Zip.compress(resultDirectory, resultZipFile, new ConsoleProgressBar(1));
119
		DeleteDir.deleteDirectory(resultDirectory);
120
		
121
		System.out.println("Annotations saved in "+resultZipFile.getAbsolutePath());
122
		return resultZipFile.exists();
123
	}
124
	
125
	/**
126
	 * 
127
	 * @param allCorpusAnnotations ordered annotations
128
	 * @return
129
	 * @throws IOException
130
	 * @throws CqiServerError
131
	 * @throws CqiClientException
132
	 * @throws InvalidCqpIdException
133
	 * @throws XMLStreamException 
134
	 */
135
	public boolean writeAnnotations(List<Annotation> allCorpusAnnotations) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
136
		//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName());
137
		System.out.println("Saving "+allCorpusAnnotations.size()+"annotations...");
138

  
139
		int[] end_limits = corpus.getTextEndLimits();
140
		int[] start_limits = corpus.getTextStartLimits();
141
		List<String> textsIds = corpus.getTextsID();
142

  
143
		File inputDirectory = corpus.getBaseDirectory();
144
		File txmDirectory = new File(inputDirectory, "txm/"+corpus.getName());
145

  
146
		ArrayList<Annotation> textAnnotations = new ArrayList<Annotation>();
147
		HashMap<String, ArrayList<Annotation>> annotationsPerTexts = new HashMap<String, ArrayList<Annotation>>();
148

  
149
		int currentText = 0;
150
		File currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
151

  
152
		//store first text
153
		currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
154
		textAnnotations = new ArrayList<Annotation>();
155
		annotationsPerTexts.put(textsIds.get(currentText), textAnnotations);
156

  
157
		// group annotations per text
158
		for (Annotation currentAnnot : allCorpusAnnotations) { // parse all annotations
159
			//System.out.println(" Annotation: "+currentAnnot);
160
			int pos = currentAnnot.getPK().getEndPosition();
161

  
162
			while (pos > end_limits[currentText]) { // while pos is not in the currentText.end 
163
				currentText++;
164
				currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
165
				textAnnotations = new ArrayList<Annotation>();
166
				annotationsPerTexts.put(textsIds.get(currentText), textAnnotations);
167
			}
168

  
169
			textAnnotations.add(currentAnnot);
170
		}
171

  
172
		File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_tmp");
173
		DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
174
		tmpXMLTXMDirectory.mkdirs();
175
		if (!(tmpXMLTXMDirectory.exists() && tmpXMLTXMDirectory.canWrite())) {
176
			System.out.println("ERROR: could not create directory: "+tmpXMLTXMDirectory);
177
			return false;
178
		}
179

  
180
		File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous");
181
		//DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
182
		previousXMLTXMDirectory.mkdirs();
183
		if (!(previousXMLTXMDirectory.exists()  && previousXMLTXMDirectory.canWrite())) {
184
			System.out.println("ERROR: could not create directory: "+previousXMLTXMDirectory);
185
			return false;
186
		}
187

  
188
		System.out.println("Annotations grouped per text for "+annotationsPerTexts.size()+" text"+(annotationsPerTexts.size() > 0?"s":""));
189
		System.out.println(" - Writing temporary XML files in: "+tmpXMLTXMDirectory);
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff