Révision 850
| tmp/org.txm.annotation.kr.core/.classpath (revision 850) | ||
|---|---|---|
| 1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
| 2 |
<classpath> |
|
| 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/> |
|
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
| 5 |
<accessrules> |
|
| 6 |
<accessrule kind="accessible" pattern="**"/> |
|
| 7 |
</accessrules> |
|
| 8 |
</classpathentry> |
|
| 9 |
<classpathentry kind="src" path="src"/> |
|
| 10 |
<classpathentry kind="lib" path="libs/hsqldb.jar"/> |
|
| 11 |
<classpathentry kind="lib" path="libs/postgresql-9.4.1207.jre6.jar"/> |
|
| 12 |
<classpathentry kind="lib" path="libs/sqlite-jdbc-3.8.11.2.jar"/> |
|
| 13 |
<classpathentry kind="output" path="bin"/> |
|
| 14 |
</classpath> |
|
| 0 | 15 | |
| tmp/org.txm.annotation.kr.core/META-INF/persistence.xml (revision 850) | ||
|---|---|---|
| 1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
| 2 |
<persistence version="2.1" xmlns="http://xmlns.jcp.org/xml/ns/persistence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
| 3 |
<persistence-unit name="HSQLKRPERSISTENCE" transaction-type="RESOURCE_LOCAL"> |
|
| 4 |
<provider>org.eclipse.persistence.jpa.PersistenceProvider</provider> |
|
| 5 |
|
|
| 6 |
<class>org.txm.annotation.kr.core.Annotation</class> |
|
| 7 |
<class>org.txm.annotation.kr.core.repository.AnnotationType</class> |
|
| 8 |
<class>org.txm.annotation.kr.core.repository.TypedValue</class> |
|
| 9 |
<!-- |
|
| 10 |
<class>org.txm.functions.dictionary_jpa.EntryId</class> |
|
| 11 |
<class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
| 12 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> |
|
| 13 |
--> |
|
| 14 |
<properties> |
|
| 15 |
<property name="javax.persistence.jdbc.driver" value="org.hsqldb.jdbcDriver"/> |
|
| 16 |
<property name="javax.persistence.jdbc.url" value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0"/> |
|
| 17 |
<property name="javax.persistence.jdbc.user" value="SA"/> |
|
| 18 |
|
|
| 19 |
<!-- <property name="eclipselink.logging.parameters" value="false"/> |
|
| 20 |
<property name="eclipselink.logging.level" value="ON" /> |
|
| 21 |
|
|
| 22 |
|
|
| 23 |
<property name="eclipselink.jdbc.read-connections.min" value="1" /> |
|
| 24 |
<property name="eclipselink.jdbc.write-connections.min" value="1" /> |
|
| 25 |
<property name="eclipselink.jdbc.batch-writing" value="JDBC" /> |
|
| 26 |
--> |
|
| 27 |
|
|
| 28 |
<!-- Logging |
|
| 29 |
<property name="eclipselink.logging.file" value="output.log"/> |
|
| 30 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> |
|
| 31 |
--> |
|
| 32 |
|
|
| 33 |
<!-- |
|
| 34 |
<property name="eclipselink.logging.level" value="FINE" /> |
|
| 35 |
<property name="eclipselink.logging.timestamp" value="false" /> |
|
| 36 |
<property name="eclipselink.logging.session" value="false" /> |
|
| 37 |
<property name="eclipselink.logging.thread" value="false" /> |
|
| 38 |
--> |
|
| 39 |
|
|
| 40 |
<!-- <property name="eclipselink.ddl-generation" value="drop-and-create-tables"/> --> |
|
| 41 |
<!-- <property name="eclipselink.ddl-generation.output-mode" value="database"/> --> |
|
| 42 |
</properties> |
|
| 43 |
|
|
| 44 |
</persistence-unit> |
|
| 45 |
</persistence> |
|
| 0 | 46 | |
| tmp/org.txm.annotation.kr.core/META-INF/MANIFEST.MF (revision 850) | ||
|---|---|---|
| 1 |
Manifest-Version: 1.0 |
|
| 2 |
Require-Bundle: org.txm.core;bundle-version="0.7.0";visibility:=reexport, |
|
| 3 |
org.txm.searchengine.cqp.core;visibility:=reexport, |
|
| 4 |
org.txm.utils;bundle-version="1.0.0";visibility:=reexport, |
|
| 5 |
javax.persistence;bundle-version="2.1.0";visibility:=reexport, |
|
| 6 |
org.eclipse.persistence.asm;bundle-version="3.3.1";visibility:=reexport, |
|
| 7 |
org.eclipse.persistence.jpa;bundle-version="2.6.0";visibility:=reexport, |
|
| 8 |
org.eclipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport, |
|
| 9 |
org.eclipse.persistence.antlr;bundle-version="3.2.0";visibility:=reexport, |
|
| 10 |
org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport, |
|
| 11 |
org.txm.searchengine.core;bundle-version="1.0.0";visibility:=reexport, |
|
| 12 |
org.eclipse.ui.workbench;visibility:=reexport, |
|
| 13 |
org.txm.annotation.core;visibility:=reexport |
|
| 14 |
Export-Package: org.hsqldb, |
|
| 15 |
org.hsqldb.auth, |
|
| 16 |
org.hsqldb.dbinfo, |
|
| 17 |
org.hsqldb.error, |
|
| 18 |
org.hsqldb.index, |
|
| 19 |
org.hsqldb.jdbc, |
|
| 20 |
org.hsqldb.jdbc.pool, |
|
| 21 |
org.hsqldb.lib, |
|
| 22 |
org.hsqldb.lib.java, |
|
| 23 |
org.hsqldb.lib.tar, |
|
| 24 |
org.hsqldb.map, |
|
| 25 |
org.hsqldb.navigator, |
|
| 26 |
org.hsqldb.persist, |
|
| 27 |
org.hsqldb.resources, |
|
| 28 |
org.hsqldb.result, |
|
| 29 |
org.hsqldb.rights, |
|
| 30 |
org.hsqldb.rowio, |
|
| 31 |
org.hsqldb.scriptio, |
|
| 32 |
org.hsqldb.server, |
|
| 33 |
org.hsqldb.types, |
|
| 34 |
org.hsqldb.util, |
|
| 35 |
org.postgresql, |
|
| 36 |
org.postgresql.copy, |
|
| 37 |
org.postgresql.core, |
|
| 38 |
org.postgresql.core.v2, |
|
| 39 |
org.postgresql.core.v3, |
|
| 40 |
org.postgresql.ds, |
|
| 41 |
org.postgresql.ds.common, |
|
| 42 |
org.postgresql.fastpath, |
|
| 43 |
org.postgresql.geometric, |
|
| 44 |
org.postgresql.gss, |
|
| 45 |
org.postgresql.hostchooser, |
|
| 46 |
org.postgresql.jdbc, |
|
| 47 |
org.postgresql.jdbc2, |
|
| 48 |
org.postgresql.jdbc2.optional, |
|
| 49 |
org.postgresql.jdbc3, |
|
| 50 |
org.postgresql.largeobject, |
|
| 51 |
org.postgresql.osgi, |
|
| 52 |
org.postgresql.ssl, |
|
| 53 |
org.postgresql.ssl.jdbc4, |
|
| 54 |
org.postgresql.sspi, |
|
| 55 |
org.postgresql.translation, |
|
| 56 |
org.postgresql.util, |
|
| 57 |
org.postgresql.xa, |
|
| 58 |
org.sqlite, |
|
| 59 |
org.sqlite.core, |
|
| 60 |
org.sqlite.date, |
|
| 61 |
org.sqlite.javax, |
|
| 62 |
org.sqlite.jdbc3, |
|
| 63 |
org.sqlite.jdbc4, |
|
| 64 |
org.sqlite.util, |
|
| 65 |
org.txm.annotation.kr.core, |
|
| 66 |
org.txm.annotation.kr.core.conversion, |
|
| 67 |
org.txm.annotation.kr.core.preferences, |
|
| 68 |
org.txm.annotation.kr.core.repository, |
|
| 69 |
org.txm.annotation.kr.core.storage.temporary, |
|
| 70 |
org.txm.annotation.kr.core.temporary |
|
| 71 |
Bundle-ActivationPolicy: lazy |
|
| 72 |
Bundle-ClassPath: .,libs/hsqldb.jar,libs/postgresql-9.4.1207.jre6.jar, |
|
| 73 |
libs/sqlite-jdbc-3.8.11.2.jar |
|
| 74 |
Bundle-Version: 1.0.0.qualifier |
|
| 75 |
Bundle-Name: org.txm.annotation.kr.core |
|
| 76 |
Bundle-ManifestVersion: 2 |
|
| 77 |
Bundle-Activator: org.txm.annotation.kr.core.Activator |
|
| 78 |
Bundle-SymbolicName: org.txm.annotation.kr.core;singleton:=true |
|
| 79 |
Meta-Persistence: META-INF/persistence.xml |
|
| 80 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.7 |
|
| 81 |
|
|
| 0 | 82 | |
| tmp/org.txm.annotation.kr.core/.project (revision 850) | ||
|---|---|---|
| 1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
| 2 |
<projectDescription> |
|
| 3 |
<name>org.txm.annotation.kr.core</name> |
|
| 4 |
<comment></comment> |
|
| 5 |
<projects> |
|
| 6 |
</projects> |
|
| 7 |
<buildSpec> |
|
| 8 |
<buildCommand> |
|
| 9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
| 10 |
<arguments> |
|
| 11 |
</arguments> |
|
| 12 |
</buildCommand> |
|
| 13 |
<buildCommand> |
|
| 14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
| 15 |
<arguments> |
|
| 16 |
</arguments> |
|
| 17 |
</buildCommand> |
|
| 18 |
<buildCommand> |
|
| 19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
| 20 |
<arguments> |
|
| 21 |
</arguments> |
|
| 22 |
</buildCommand> |
|
| 23 |
</buildSpec> |
|
| 24 |
<natures> |
|
| 25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
| 26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
| 27 |
</natures> |
|
| 28 |
</projectDescription> |
|
| 0 | 29 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/AllTests.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.temporary; |
|
| 2 |
|
|
| 3 |
import org.junit.runner.RunWith; |
|
| 4 |
import org.junit.runners.Suite; |
|
| 5 |
import org.junit.runners.Suite.SuiteClasses; |
|
| 6 |
import org.txm.StartToolbox; |
|
| 7 |
|
|
| 8 |
@RunWith(Suite.class) |
|
| 9 |
@SuiteClasses({ StartToolbox.class, CreateAnnotation.class, DeleteAnnotation.class,
|
|
| 10 |
UpdateAnnotation.class }) |
|
| 11 |
public class AllTests {
|
|
| 12 |
|
|
| 13 |
} |
|
| 0 | 14 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/DeleteAnnotation.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.temporary; |
|
| 2 |
|
|
| 3 |
import static org.junit.Assert.*; |
|
| 4 |
|
|
| 5 |
import org.junit.Test; |
|
| 6 |
|
|
| 7 |
public class DeleteAnnotation {
|
|
| 8 |
|
|
| 9 |
@Test |
|
| 10 |
public void test() {
|
|
| 11 |
fail("Not yet implemented");
|
|
| 12 |
} |
|
| 13 |
|
|
| 14 |
} |
|
| 0 | 15 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/UpdateAnnotation.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.temporary; |
|
| 2 |
|
|
| 3 |
import static org.junit.Assert.*; |
|
| 4 |
|
|
| 5 |
import org.junit.Test; |
|
| 6 |
|
|
| 7 |
public class UpdateAnnotation {
|
|
| 8 |
|
|
| 9 |
@Test |
|
| 10 |
public void test() {
|
|
| 11 |
fail("Not yet implemented");
|
|
| 12 |
} |
|
| 13 |
|
|
| 14 |
} |
|
| 0 | 15 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/CreateAnnotation.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.temporary; |
|
| 2 |
|
|
| 3 |
import static org.junit.Assert.*; |
|
| 4 |
|
|
| 5 |
import java.util.HashMap; |
|
| 6 |
|
|
| 7 |
import org.junit.Test; |
|
| 8 |
import org.txm.Toolbox; |
|
| 9 |
import org.txm.annotation.kr.core.storage.temporary.TemporaryAnnotationManager; |
|
| 10 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
| 11 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
| 12 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
| 13 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 14 |
|
|
| 15 |
public class CreateAnnotation {
|
|
| 16 |
|
|
| 17 |
@Test |
|
| 18 |
public void test() throws CqiClientException, InvalidCqpIdException {
|
|
| 19 |
if (!Toolbox.isInitialized()) fail("Toolbox not initialized.");
|
|
| 20 |
MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus("VOEUX");
|
|
| 21 |
if (corpus == null) fail("Corpus Voeux not loaded.");
|
|
| 22 |
|
|
| 23 |
HashMap<String, Object> properties = TemporaryAnnotationManager.getInitialisationProperties(this.getClass(), corpus); |
|
| 24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home")+"/workspace442/org.txm.core/META-INF/persistence.xml");
|
|
| 25 |
|
|
| 26 |
TemporaryAnnotationManager tam = new TemporaryAnnotationManager(corpus, properties); |
|
| 27 |
System.out.println(tam); |
|
| 28 |
} |
|
| 29 |
|
|
| 30 |
} |
|
| 0 | 31 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationComparator.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core; |
|
| 2 |
|
|
| 3 |
import java.text.DateFormat; |
|
| 4 |
import java.text.SimpleDateFormat; |
|
| 5 |
import java.util.Comparator; |
|
| 6 |
import java.util.StringTokenizer; |
|
| 7 |
|
|
| 8 |
public class AnnotationComparator implements Comparator<Annotation> {
|
|
| 9 |
|
|
| 10 |
public int compare(Annotation a1, Annotation a2) {
|
|
| 11 |
// comparer e1 et e2 |
|
| 12 |
|
|
| 13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd");
|
|
| 14 |
//System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate()));
|
|
| 15 |
//System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate()));
|
|
| 16 |
StringTokenizer tokenizer1 = new StringTokenizer(dateformat.format(a1.getDate()), "-"); |
|
| 17 |
StringTokenizer tokenizer2 = new StringTokenizer(dateformat.format(a2.getDate()), "-"); |
|
| 18 |
|
|
| 19 |
for(int i = 0 ; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens() ; ++i) {
|
|
| 20 |
String token1 = tokenizer1.nextToken(); |
|
| 21 |
String token2 = tokenizer2.nextToken(); |
|
| 22 |
int valint1 = new Integer(token1).intValue(); |
|
| 23 |
int valint2 = new Integer(token2).intValue(); |
|
| 24 |
//System.out.println(valint1+" | "+valint2); |
|
| 25 |
if (valint1<valint2){
|
|
| 26 |
return -1; |
|
| 27 |
}else {
|
|
| 28 |
if(valint1>valint2){
|
|
| 29 |
return 1; |
|
| 30 |
}else {
|
|
| 31 |
//System.out.println("idem ["+i+"]");
|
|
| 32 |
} |
|
| 33 |
|
|
| 34 |
} |
|
| 35 |
} |
|
| 36 |
|
|
| 37 |
return 0; |
|
| 38 |
|
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
public static void main(String[] args) {
|
|
| 42 |
AnnotationComparator comp = new AnnotationComparator(); |
|
| 43 |
Annotation a1 = new Annotation("truc", "bidule", 3, 10);
|
|
| 44 |
Annotation a2 = new Annotation("truc", "bidule", 6, 8);
|
|
| 45 |
int ret = comp.compare(a1, a2); |
|
| 46 |
System.out.println("Le retour est : "+ret);
|
|
| 47 |
} |
|
| 48 |
} |
|
| 0 | 49 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/DatabasePersistenceManager.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core; |
|
| 2 |
|
|
| 3 |
import java.util.HashMap; |
|
| 4 |
|
|
| 5 |
import javax.persistence.EntityManager; |
|
| 6 |
|
|
| 7 |
public class DatabasePersistenceManager {
|
|
| 8 |
|
|
| 9 |
//Object can be Corpus or KnowledgeRepository |
|
| 10 |
protected HashMap<Object, EntityManager> managers; |
|
| 11 |
public static final String PERSISTENCE_UNIT_NAME = "HSQLKRPERSISTENCE"; |
|
| 12 |
public static String ACCESS_SQL = "sql"; |
|
| 13 |
public static String ACCESS_FILE = "file"; |
|
| 14 |
public static String ACCESS_SPARQL = "sparql"; |
|
| 15 |
|
|
| 16 |
/** |
|
| 17 |
* Instantiates a new database manager. |
|
| 18 |
*/ |
|
| 19 |
public DatabasePersistenceManager() {
|
|
| 20 |
managers = new HashMap<Object, EntityManager>() ; |
|
| 21 |
} |
|
| 22 |
|
|
| 23 |
/** |
|
| 24 |
* The Object can be a Corpus or a KnowledgeRepository |
|
| 25 |
* @param obj |
|
| 26 |
* @return |
|
| 27 |
*/ |
|
| 28 |
public EntityManager getJPAEntityManager(Object obj){
|
|
| 29 |
if (managers.containsKey(obj)) {
|
|
| 30 |
return managers.get(obj); |
|
| 31 |
} |
|
| 32 |
return null; |
|
| 33 |
} |
|
| 34 |
|
|
| 35 |
public void closeManager(Object key) {
|
|
| 36 |
if (!managers.keySet().contains(key)) return; |
|
| 37 |
|
|
| 38 |
managers.get(key).close(); |
|
| 39 |
managers.remove(key); |
|
| 40 |
} |
|
| 41 |
|
|
| 42 |
public void closeAllManagers() {
|
|
| 43 |
for (Object key : managers.keySet()) {
|
|
| 44 |
EntityManager m = managers.get(key); |
|
| 45 |
m.flush(); |
|
| 46 |
m.createNativeQuery("SHUTDOWN;");
|
|
| 47 |
m.close(); |
|
| 48 |
} |
|
| 49 |
managers.clear(); |
|
| 50 |
} |
|
| 51 |
} |
|
| 0 | 52 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationManager.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.util.ArrayList; |
|
| 5 |
import java.util.Arrays; |
|
| 6 |
import java.util.HashMap; |
|
| 7 |
import java.util.List; |
|
| 8 |
|
|
| 9 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 10 |
import org.txm.annotation.kr.core.repository.AnnotationEffect; |
|
| 11 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
|
| 12 |
import org.txm.annotation.kr.core.repository.TypedValue; |
|
| 13 |
import org.txm.annotation.kr.core.storage.temporary.TemporaryAnnotationManager; |
|
| 14 |
import org.txm.core.engines.Engine; |
|
| 15 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 16 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
| 17 |
import org.txm.utils.logger.Log; |
|
| 18 |
|
|
| 19 |
/** |
|
| 20 |
* Manage annotations and is able to return annotation saved in JPA |
|
| 21 |
* |
|
| 22 |
* @author mdecorde |
|
| 23 |
* |
|
| 24 |
*/ |
|
| 25 |
public class AnnotationManager implements Engine {
|
|
| 26 |
|
|
| 27 |
MainCorpus corpus; |
|
| 28 |
TemporaryAnnotationManager tempManager; |
|
| 29 |
CQPAnnotationManager cqpManager; |
|
| 30 |
boolean dirty = false; |
|
| 31 |
|
|
| 32 |
public AnnotationManager(MainCorpus mainCorpus){
|
|
| 33 |
this.corpus = mainCorpus; |
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
public TemporaryAnnotationManager getTemporaryManager(){
|
|
| 37 |
return tempManager; |
|
| 38 |
} |
|
| 39 |
|
|
| 40 |
public CQPAnnotationManager getCQPManager(){
|
|
| 41 |
return cqpManager; |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
public boolean saveAnnotations() throws Exception{
|
|
| 45 |
List<Annotation> annots = tempManager.getAnnotations(); |
|
| 46 |
if (annots.isEmpty()) {
|
|
| 47 |
System.out.println("No annotation to save. Aborting.");
|
|
| 48 |
dirty = false; |
|
| 49 |
return true; |
|
| 50 |
} |
|
| 51 |
|
|
| 52 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
|
| 53 |
if (writer.writeAnnotations(annots)) {
|
|
| 54 |
Log.info("Annotations succesfully written. Deleting temporary annotations...");
|
|
| 55 |
tempManager.deleteAnnotations(); |
|
| 56 |
dirty = false; |
|
| 57 |
return true; |
|
| 58 |
} |
|
| 59 |
return false; |
|
| 60 |
} |
|
| 61 |
|
|
| 62 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception{
|
|
| 63 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
|
| 64 |
|
|
| 65 |
if (writer.writeAnnotationsInStandoff(resultZipFile)) {
|
|
| 66 |
Log.info("Annotations succesfully written in "+resultZipFile);
|
|
| 67 |
return true; |
|
| 68 |
} |
|
| 69 |
return false; |
|
| 70 |
} |
|
| 71 |
|
|
| 72 |
//TODO: not ended? |
|
| 73 |
/** |
|
| 74 |
* Deletes the annotations stored in the temporary annotation manager |
|
| 75 |
* @param type |
|
| 76 |
* @param job |
|
| 77 |
* @return |
|
| 78 |
* @throws Exception |
|
| 79 |
*/ |
|
| 80 |
public boolean deleteAnnotations(AnnotationType type, IProgressMonitor job) throws Exception {
|
|
| 81 |
List<Annotation> temporaryAnnotations = null; |
|
| 82 |
List<Annotation> cqpAnnotations = null; |
|
| 83 |
try {
|
|
| 84 |
temporaryAnnotations = tempManager.getAnnotations(type); |
|
| 85 |
tempManager.getEntityManager().getTransaction().begin(); |
|
| 86 |
for (Annotation a : temporaryAnnotations){
|
|
| 87 |
if (job != null && job.isCanceled()) {
|
|
| 88 |
System.out.println("Delete annotation canceled.");
|
|
| 89 |
return false; |
|
| 90 |
} |
|
| 91 |
tempManager.deleteAnnotation(type, a.getStart(), a.getEnd()); |
|
| 92 |
} |
|
| 93 |
tempManager.getEntityManager().getTransaction().commit(); |
|
| 94 |
|
|
| 95 |
cqpAnnotations = cqpManager.getAnnotations(type); |
|
| 96 |
tempManager.getEntityManager().getTransaction().begin(); |
|
| 97 |
for (Annotation a : cqpAnnotations) {
|
|
| 98 |
if (job != null && job.isCanceled()) {
|
|
| 99 |
System.out.println("Delete annotation canceled.");
|
|
| 100 |
return false; |
|
| 101 |
} |
|
| 102 |
String value = cqpManager.getCQPAnnotationValue(a.getStart(), a.getEnd(), type); |
|
| 103 |
if (value != null) {
|
|
| 104 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd());
|
|
| 105 |
} else {
|
|
| 106 |
tempManager.deleteAnnotationNoCommit(type, a.getStart(), a.getEnd()); |
|
| 107 |
} |
|
| 108 |
} |
|
| 109 |
dirty = true; |
|
| 110 |
tempManager.getEntityManager().getTransaction().commit(); |
|
| 111 |
} catch(Exception e) {
|
|
| 112 |
e.printStackTrace(); |
|
| 113 |
return false; |
|
| 114 |
} |
|
| 115 |
return true; |
|
| 116 |
} |
|
| 117 |
|
|
| 118 |
public boolean deleteAnnotations(AnnotationType type, List<Match> matches, IProgressMonitor job) throws Exception {
|
|
| 119 |
try {
|
|
| 120 |
tempManager.getEntityManager().getTransaction().begin(); |
|
| 121 |
for (Match m : matches) {
|
|
| 122 |
if (job != null && job.isCanceled()) {
|
|
| 123 |
System.out.println("Delete annotation canceled.");
|
|
| 124 |
return false; |
|
| 125 |
} |
|
| 126 |
|
|
| 127 |
String value = cqpManager.getCQPAnnotationValue(m.getStart(), m.getEnd(), type); |
|
| 128 |
|
|
| 129 |
if (value != null) {
|
|
| 130 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), m.getStart(), m.getEnd());
|
|
| 131 |
} else {
|
|
| 132 |
tempManager.deleteAnnotationNoCommit(type, m.getStart(), m.getEnd()); |
|
| 133 |
} |
|
| 134 |
} |
|
| 135 |
dirty = true; |
|
| 136 |
tempManager.getEntityManager().getTransaction().commit(); |
|
| 137 |
} catch (Exception e) {
|
|
| 138 |
e.printStackTrace(); |
|
| 139 |
return false; |
|
| 140 |
} |
|
| 141 |
return true; |
|
| 142 |
} |
|
| 143 |
|
|
| 144 |
/** |
|
| 145 |
* Returns the annotation saved in the temporary database and in the CQP corpus indexes |
|
| 146 |
* |
|
| 147 |
* CQP Annotations must be shadowed by temporary annotations of the same type and positions |
|
| 148 |
*/ |
|
| 149 |
public List<Annotation> getAnnotationsForMatches(AnnotationType type, List<Match> matches, boolean overlap) {
|
|
| 150 |
List<Annotation> temporaryAnnotations = null; |
|
| 151 |
List<Annotation> resultAnnotations = new ArrayList<Annotation>(); |
|
| 152 |
try {
|
|
| 153 |
temporaryAnnotations = tempManager.getAnnotations(type, matches, null, false, overlap); |
|
| 154 |
temporaryAnnotations = tempManager.getAnnotationsForMatches(matches, temporaryAnnotations, overlap); |
|
| 155 |
|
|
| 156 |
List<Annotation> cqpAnnotations = cqpManager.getAnnotationsForMatches(matches, type, overlap); |
|
| 157 |
|
|
| 158 |
// System.out.println("Temporary annotations: "+temporaryAnnotations);
|
|
| 159 |
// System.out.println("CQP annotations: "+cqpAnnotations);
|
|
| 160 |
if (cqpAnnotations.size() != matches.size() || temporaryAnnotations.size() != matches.size()) {
|
|
| 161 |
System.out.println("ERROR in getAnnotationsForMatches methods! ");
|
|
| 162 |
return new ArrayList<Annotation>(matches.size()); |
|
| 163 |
} |
|
| 164 |
// merge the 2 results |
|
| 165 |
for (int i = 0 ; i < matches.size() ; i++) {
|
|
| 166 |
if (cqpAnnotations.get(i) == null && temporaryAnnotations.get(i) == null) {
|
|
| 167 |
resultAnnotations.add(null); |
|
| 168 |
} else if (temporaryAnnotations.get(i) != null) {
|
|
| 169 |
resultAnnotations.add(temporaryAnnotations.get(i)); |
|
| 170 |
} else if (cqpAnnotations.get(i) != null) {
|
|
| 171 |
resultAnnotations.add(cqpAnnotations.get(i)); |
|
| 172 |
} else {
|
|
| 173 |
resultAnnotations.add(null); |
|
| 174 |
} |
|
| 175 |
} |
|
| 176 |
} catch (Exception e) {
|
|
| 177 |
e.printStackTrace(); |
|
| 178 |
return new ArrayList<Annotation>(matches.size()); |
|
| 179 |
} |
|
| 180 |
return resultAnnotations; |
|
| 181 |
} |
|
| 182 |
|
|
| 183 |
public void clearInstance() {
|
|
| 184 |
try {
|
|
| 185 |
tempManager.close(); |
|
| 186 |
} catch(Exception e) {
|
|
| 187 |
System.out.println("Fail to clear AnnotationManager instance: "+e);
|
|
| 188 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
| 189 |
} |
|
| 190 |
} |
|
| 191 |
|
|
| 192 |
public void checkData() {
|
|
| 193 |
try {
|
|
| 194 |
tempManager.checkData(); |
|
| 195 |
} catch (Exception e) {
|
|
| 196 |
// TODO Auto-generated catch block |
|
| 197 |
e.printStackTrace(); |
|
| 198 |
} |
|
| 199 |
} |
|
| 200 |
|
|
| 201 |
public boolean hasChanges() {
|
|
| 202 |
return tempManager.hasChanges(); |
|
| 203 |
} |
|
| 204 |
|
|
| 205 |
/** |
|
| 206 |
* |
|
| 207 |
* @param annotSelectedType not null |
|
| 208 |
* @param annotSelectedTypedValue not null |
|
| 209 |
* @param matches not null |
|
| 210 |
* @param job may be null |
|
| 211 |
* @return |
|
| 212 |
*/ |
|
| 213 |
public HashMap<Match,List<Annotation>> createAnnotations(AnnotationType annotSelectedType, |
|
| 214 |
TypedValue annotSelectedTypedValue, List<Match> matches, IProgressMonitor job) {
|
|
| 215 |
|
|
| 216 |
HashMap<Match,List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>(); |
|
| 217 |
tempManager.getEntityManager().getTransaction().begin(); // warning |
|
| 218 |
for (Match match : matches) {
|
|
| 219 |
allAnnotationsThatCollides.put(match, new ArrayList<Annotation>()); |
|
| 220 |
|
|
| 221 |
if (job != null && job.isCanceled()) { // check if user canceled the job
|
|
| 222 |
System.out.println("Affect annotation canceled.");
|
|
| 223 |
return null; |
|
| 224 |
} |
|
| 225 |
|
|
| 226 |
try {
|
|
| 227 |
List<Annotation> cqpAnnotations = null; |
|
| 228 |
if (annotSelectedType.getEffect().equals(AnnotationEffect.SEGMENT)) {
|
|
| 229 |
cqpAnnotations = cqpManager.getAnnotations(null, match, null, true); // get all annotations |
|
| 230 |
// remove A)the wrapping annotations and B) the annotation with same type and same positions |
|
| 231 |
for (int i = 0 ; i < cqpAnnotations.size() ; i++) {
|
|
| 232 |
Annotation a = cqpAnnotations.get(i); |
|
| 233 |
|
|
| 234 |
// exact match + exact type |
|
| 235 |
if (a.getType().equals(annotSelectedType.getId()) && a.getStart() == match.getStart() && a.getEnd() == match.getEnd()) {
|
|
| 236 |
cqpAnnotations.remove(i); |
|
| 237 |
i--; |
|
| 238 |
} else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap |
|
| 239 |
(a.getStart() <= match.getStart() && match.getEnd() <= a.getEnd()) || |
|
| 240 |
(match.getStart() <= a.getStart() && a.getEnd() <= match.getEnd()) |
|
| 241 |
)) {
|
|
| 242 |
cqpAnnotations.remove(i); |
|
| 243 |
i--; |
|
| 244 |
} |
|
| 245 |
} |
|
| 246 |
} else {
|
|
| 247 |
// no need to test collision (AnnotationType=TOKEN) |
|
| 248 |
cqpAnnotations = new ArrayList<Annotation>(); |
|
| 249 |
} |
|
| 250 |
|
|
| 251 |
if (cqpAnnotations.size() > 0) {
|
|
| 252 |
allAnnotationsThatCollides.get(match).addAll(cqpAnnotations); |
|
| 253 |
continue; // don't create annotation, process next match |
|
| 254 |
} else { // test with temporary annotation manager
|
|
| 255 |
List<Annotation> tempAnnotations = null; |
|
| 256 |
if (match.getTarget() >= 0) { // the @ operator has been used, annotate only the @position
|
|
| 257 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getTarget(), match.getTarget()); |
|
| 258 |
} else {
|
|
| 259 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getStart(), match.getEnd()); |
|
| 260 |
} |
|
| 261 |
if (tempAnnotations.size() > 0) |
|
| 262 |
allAnnotationsThatCollides.get(match).addAll(tempAnnotations); |
|
| 263 |
} |
|
| 264 |
} catch (Exception e) {
|
|
| 265 |
Log.printStackTrace(e); |
|
| 266 |
System.out.println("Error during annotation creation: "+e);
|
|
| 267 |
} |
|
| 268 |
|
|
| 269 |
if (allAnnotationsThatCollides.get(match).size() == 0) allAnnotationsThatCollides.remove(match); // keep only colision lists |
|
| 270 |
} |
|
| 271 |
dirty = true; |
|
| 272 |
tempManager.getEntityManager().getTransaction().commit(); // warning |
|
| 273 |
// test if there are CQP annotations for the |
|
| 274 |
return allAnnotationsThatCollides; |
|
| 275 |
} |
|
| 276 |
|
|
| 277 |
public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) {
|
|
| 278 |
List<Annotation> temporaryAnnotations = null; |
|
| 279 |
List<Annotation> cqpAnnotations = null; |
|
| 280 |
try {
|
|
| 281 |
temporaryAnnotations = tempManager.getAnnotations(type, Arrays.asList(new Match(start, end)), null, false, overlap); |
|
| 282 |
cqpAnnotations = cqpManager.getAnnotations(type, start, end, overlap); |
|
| 283 |
|
|
| 284 |
int i = 0; |
|
| 285 |
for (Annotation a : cqpAnnotations) {
|
|
| 286 |
while (temporaryAnnotations.get(i).getStart() < a.getStart()) {
|
|
| 287 |
i++; |
|
| 288 |
} |
|
| 289 |
temporaryAnnotations.add(i, a); |
|
| 290 |
} |
|
| 291 |
} catch(Exception e) {
|
|
| 292 |
|
|
| 293 |
} |
|
| 294 |
return temporaryAnnotations; |
|
| 295 |
} |
|
| 296 |
|
|
| 297 |
public List<Annotation> getAnnotations(AnnotationType type, int i, int j) {
|
|
| 298 |
return getAnnotations(type, i, j, false); |
|
| 299 |
} |
|
| 300 |
|
|
| 301 |
public void closeAll() {
|
|
| 302 |
Log.info("Closing annotation manager of "+corpus);
|
|
| 303 |
tempManager.close(); |
|
| 304 |
cqpManager.close(); |
|
| 305 |
} |
|
| 306 |
|
|
| 307 |
public boolean isOpen() {
|
|
| 308 |
return tempManager.getEntityManager() != null && tempManager.getEntityManager().isOpen(); |
|
| 309 |
} |
|
| 310 |
|
|
| 311 |
@Override |
|
| 312 |
public String getName() {
|
|
| 313 |
return "Annotation"; |
|
| 314 |
} |
|
| 315 |
|
|
| 316 |
@Override |
|
| 317 |
public boolean getState() {
|
|
| 318 |
return isOpen(); |
|
| 319 |
} |
|
| 320 |
|
|
| 321 |
@Override |
|
| 322 |
public boolean initialize() throws Exception {
|
|
| 323 |
tempManager = new TemporaryAnnotationManager(corpus); |
|
| 324 |
dirty = tempManager.getAnnotations().size() > 0; |
|
| 325 |
cqpManager = new CQPAnnotationManager(corpus); |
|
| 326 |
return false; |
|
| 327 |
} |
|
| 328 |
|
|
| 329 |
@Override |
|
| 330 |
public boolean start(IProgressMonitor arg0) throws Exception {
|
|
| 331 |
return true; |
|
| 332 |
} |
|
| 333 |
|
|
| 334 |
@Override |
|
| 335 |
public boolean stop() throws Exception {
|
|
| 336 |
return true; |
|
| 337 |
} |
|
| 338 |
|
|
| 339 |
/** |
|
| 340 |
* |
|
| 341 |
* @return |
|
| 342 |
*/ |
|
| 343 |
public boolean isDirty() {
|
|
| 344 |
return dirty; |
|
| 345 |
} |
|
| 346 |
} |
|
| 0 | 347 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationPK.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core; |
|
| 2 |
|
|
| 3 |
import java.io.Serializable; |
|
| 4 |
|
|
| 5 |
import javax.persistence.*; |
|
| 6 |
|
|
| 7 |
@Embeddable |
|
| 8 |
public class AnnotationPK implements Serializable {
|
|
| 9 |
|
|
| 10 |
private static final long serialVersionUID = -2360693333015275209L; |
|
| 11 |
|
|
| 12 |
//corresponding to the start and end positions (in the corpus) |
|
| 13 |
private int startpos; |
|
| 14 |
private int endpos; |
|
| 15 |
private String refType; |
|
| 16 |
|
|
| 17 |
public AnnotationPK() {
|
|
| 18 |
} |
|
| 19 |
|
|
| 20 |
public AnnotationPK(int start, int end, String refType) {
|
|
| 21 |
this.startpos = start; |
|
| 22 |
this.endpos = end; |
|
| 23 |
this.refType = refType; |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
public void setRefType(String refType) {
|
|
| 27 |
this.refType = refType; |
|
| 28 |
} |
|
| 29 |
|
|
| 30 |
public void setStartPosition(int start) {
|
|
| 31 |
this.startpos = start; |
|
| 32 |
} |
|
| 33 |
|
|
| 34 |
public void setEndPosition(int end) {
|
|
| 35 |
this.endpos = end; |
|
| 36 |
} |
|
| 37 |
|
|
| 38 |
public String getRefType() {
|
|
| 39 |
return refType; |
|
| 40 |
} |
|
| 41 |
|
|
| 42 |
public int getStartPosition() {
|
|
| 43 |
return startpos; |
|
| 44 |
} |
|
| 45 |
|
|
| 46 |
public int getEndPosition() {
|
|
| 47 |
return endpos; |
|
| 48 |
} |
|
| 49 |
|
|
| 50 |
public boolean equals(Object obj) {
|
|
| 51 |
if (obj == null) return false; |
|
| 52 |
if (obj == this) return true; |
|
| 53 |
if (!(obj instanceof AnnotationPK)) return false; |
|
| 54 |
|
|
| 55 |
AnnotationPK other = (AnnotationPK) obj; |
|
| 56 |
return startpos == other.startpos && endpos == other.endpos&& refType.equals(other.refType); |
|
| 57 |
} |
|
| 58 |
|
|
| 59 |
public int hashCode() {
|
|
| 60 |
return refType.hashCode()+startpos+endpos; |
|
| 61 |
} |
|
| 62 |
|
|
| 63 |
public String toString() {
|
|
| 64 |
return getRefType() + "["+getStartPosition()+"-"+getEndPosition()+"]" ; |
|
| 65 |
} |
|
| 66 |
} |
|
| 0 | 67 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/CorpusRuledConvertion.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.conversion; |
|
| 2 |
|
|
| 3 |
import java.io.BufferedReader; |
|
| 4 |
import java.io.File; |
|
| 5 |
import java.io.IOException; |
|
| 6 |
import java.util.HashSet; |
|
| 7 |
import java.util.LinkedHashMap; |
|
| 8 |
import java.util.regex.Pattern; |
|
| 9 |
|
|
| 10 |
import javax.xml.stream.XMLStreamException; |
|
| 11 |
|
|
| 12 |
import org.apache.commons.lang.StringUtils; |
|
| 13 |
import org.txm.Toolbox; |
|
| 14 |
import org.txm.core.preferences.TBXPreferences; |
|
| 15 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 16 |
import org.txm.stat.utils.ConsoleProgressBar; |
|
| 17 |
import org.txm.utils.io.IOUtils; |
|
| 18 |
|
|
| 19 |
public class CorpusRuledConvertion {
|
|
| 20 |
|
|
| 21 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
| 22 |
private String oldType; |
|
| 23 |
private String newType; |
|
| 24 |
|
|
| 25 |
public CorpusRuledConvertion(File conversionFile, String oldType, String newType) throws IOException {
|
|
| 26 |
this.oldType = oldType; |
|
| 27 |
this.newType = newType; |
|
| 28 |
|
|
| 29 |
BufferedReader reader = IOUtils.getReader(conversionFile); |
|
| 30 |
String line = reader.readLine(); |
|
| 31 |
while (line != null) {
|
|
| 32 |
int idx = line.indexOf("\t");
|
|
| 33 |
if (idx > 0) {
|
|
| 34 |
String k = line.substring(0, idx); |
|
| 35 |
String v = line.substring(idx +1); |
|
| 36 |
rules.put(Pattern.compile(k), v); |
|
| 37 |
} |
|
| 38 |
line = reader.readLine(); |
|
| 39 |
} |
|
| 40 |
|
|
| 41 |
System.out.println("Conversion rules: "+rules);
|
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
public CorpusRuledConvertion(LinkedHashMap<Pattern, String> rules, |
|
| 45 |
String oldType, String newType) {
|
|
| 46 |
this.oldType = oldType; |
|
| 47 |
this.newType = newType; |
|
| 48 |
|
|
| 49 |
this.rules = rules; |
|
| 50 |
} |
|
| 51 |
|
|
| 52 |
public boolean process(MainCorpus corpus) throws XMLStreamException, IOException {
|
|
| 53 |
File binaryCorpusDirectory = corpus.getBaseDirectory(); |
|
| 54 |
File txmDirectory = new File(binaryCorpusDirectory, "txm"); |
|
| 55 |
if (!txmDirectory.exists()) {
|
|
| 56 |
System.out.println("'txm' directory not found in "+binaryCorpusDirectory.getAbsolutePath());
|
|
| 57 |
return false; |
|
| 58 |
} |
|
| 59 |
File txmCorpusDirectory = new File(txmDirectory, corpus.getName()); |
|
| 60 |
if (!txmCorpusDirectory.exists()) {
|
|
| 61 |
System.out.println("'"+corpus.getName()+"' corpus directory not found in "+txmDirectory.getAbsolutePath());
|
|
| 62 |
return false; |
|
| 63 |
} |
|
| 64 |
File[] files = txmCorpusDirectory.listFiles(); |
|
| 65 |
if (files == null || files.length == 0) {
|
|
| 66 |
System.out.println("No file in "+txmCorpusDirectory);
|
|
| 67 |
return false; |
|
| 68 |
} |
|
| 69 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
|
| 70 |
for (File xmlFile : files) {
|
|
| 71 |
cpb.tick(); |
|
| 72 |
if (xmlFile.isDirectory()) continue; |
|
| 73 |
if (xmlFile.isHidden()) continue; |
|
| 74 |
if (!xmlFile.getName().endsWith(".xml")) continue;
|
|
| 75 |
|
|
| 76 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName()); |
|
| 77 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, XMLTXMFileRuledConversion.ABANDON); |
|
| 78 |
if (converter.process(tmpFile)) {
|
|
| 79 |
xmlFile.delete(); |
|
| 80 |
// try {
|
|
| 81 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName()));
|
|
| 82 |
// } catch (IOException e) {
|
|
| 83 |
// // TODO Auto-generated catch block |
|
| 84 |
// e.printStackTrace(); |
|
| 85 |
// } |
|
| 86 |
|
|
| 87 |
HashSet<String> errors = converter.getNoMatchValues(); |
|
| 88 |
if (errors.size() > 0) {
|
|
| 89 |
System.out.println("Some values did not match rule:");
|
|
| 90 |
int i = 0; |
|
| 91 |
for (String error : errors) {
|
|
| 92 |
System.out.println("\t"+error);
|
|
| 93 |
if (i >= 10) break; |
|
| 94 |
} |
|
| 95 |
if (errors.size() > 10) {
|
|
| 96 |
try {
|
|
| 97 |
File errorFile = new File(Toolbox.getTXMHOMEPATH(), "errors.txt"); |
|
| 98 |
IOUtils.write(errorFile, StringUtils.join(errors, "\t")); |
|
| 99 |
System.out.println("More errors, see "+errorFile.getAbsolutePath());
|
|
| 100 |
} catch (Exception e) {
|
|
| 101 |
e.printStackTrace(); |
|
| 102 |
} |
|
| 103 |
} |
|
| 104 |
return false; |
|
| 105 |
} |
|
| 106 |
|
|
| 107 |
tmpFile.renameTo(xmlFile); |
|
| 108 |
if (tmpFile.exists()) {
|
|
| 109 |
System.out.println("Could not replace original file with the result file. "+xmlFile+ " with "+tmpFile);
|
|
| 110 |
return false; |
|
| 111 |
} |
|
| 112 |
} else {
|
|
| 113 |
System.out.println("Fail to process "+xmlFile);
|
|
| 114 |
return false; |
|
| 115 |
} |
|
| 116 |
} |
|
| 117 |
System.out.println(); // end of ConsoleProgressBar |
|
| 118 |
return true; |
|
| 119 |
} |
|
| 120 |
} |
|
| 0 | 121 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core.conversion; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.IOException; |
|
| 5 |
import java.util.HashSet; |
|
| 6 |
import java.util.LinkedHashMap; |
|
| 7 |
import java.util.regex.Pattern; |
|
| 8 |
|
|
| 9 |
import javax.xml.stream.XMLStreamException; |
|
| 10 |
|
|
| 11 |
import org.txm.importer.StaxIdentityParser; |
|
| 12 |
|
|
| 13 |
public class XMLTXMFileRuledConversion extends StaxIdentityParser {
|
|
| 14 |
|
|
| 15 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
| 16 |
protected String oldType; |
|
| 17 |
protected String newType; |
|
| 18 |
|
|
| 19 |
public static final String DELETE = "supprimer"; |
|
| 20 |
public static final String COPY = "copier"; |
|
| 21 |
public static final String ABANDON = "abandon"; |
|
| 22 |
HashSet<String> noMatchValues = new HashSet<String>(); |
|
| 23 |
|
|
| 24 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException {
|
|
| 25 |
super(infile); |
|
| 26 |
this.rules = rules; |
|
| 27 |
this.oldType = oldType; |
|
| 28 |
this.newType = newType; |
|
| 29 |
|
|
| 30 |
this.mode = mode; |
|
| 31 |
|
|
| 32 |
if (!this.newType.startsWith("#")) this.newType = "#"+this.newType;
|
|
| 33 |
if (!this.oldType.startsWith("#")) this.oldType = "#"+this.oldType;
|
|
| 34 |
} |
|
| 35 |
|
|
| 36 |
boolean inW = false, inAna = false, inForm; |
|
| 37 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
|
| 38 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
|
| 39 |
String typeName = null; |
|
| 40 |
String respName = null; |
|
| 41 |
String formValue, typeValue = null; |
|
| 42 |
private Object mode; |
|
| 43 |
|
|
| 44 |
@Override |
|
| 45 |
public void processStartElement() throws XMLStreamException, IOException {
|
|
| 46 |
if (!inW) super.processStartElement(); // don't write W content |
|
| 47 |
|
|
| 48 |
if (localname.equals("w")) {
|
|
| 49 |
inW = true; |
|
| 50 |
anaValues.clear(); |
|
| 51 |
anaResps.clear(); |
|
| 52 |
|
|
| 53 |
//initialize the new type to a empty value in case there is transformation rule |
|
| 54 |
anaValues.put(newType, ""); |
|
| 55 |
anaResps.put(newType, "#txm_recode"); |
|
| 56 |
} else if (localname.equals("ana")) {
|
|
| 57 |
inAna = true; |
|
| 58 |
typeName = parser.getAttributeValue(null, "type"); |
|
| 59 |
respName = parser.getAttributeValue(null, "resp"); |
|
| 60 |
anaResps.put(typeName, respName); |
|
| 61 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
| 62 |
typeValue = ""; |
|
| 63 |
} else if (localname.equals("form")) {
|
|
| 64 |
inForm = true; |
|
| 65 |
formValue = ""; |
|
| 66 |
} |
|
| 67 |
} |
|
| 68 |
|
|
| 69 |
@Override |
|
| 70 |
public void processCharacters() throws XMLStreamException {
|
|
| 71 |
if (inW && inAna) typeValue+=parser.getText(); |
|
| 72 |
else if (inW && inForm) formValue+=parser.getText(); |
|
| 73 |
else super.processCharacters(); |
|
| 74 |
} |
|
| 75 |
|
|
| 76 |
@Override |
|
| 77 |
public void processEndElement() throws XMLStreamException {
|
|
| 78 |
if (localname.equals("w")) {
|
|
| 79 |
inW = false; |
|
| 80 |
|
|
| 81 |
// write W content |
|
| 82 |
try {
|
|
| 83 |
// get the value to test |
|
| 84 |
String value = null; |
|
| 85 |
if (oldType.equals("word")) {
|
|
| 86 |
value = formValue; |
|
| 87 |
} else {
|
|
| 88 |
value = anaValues.get(oldType); |
|
| 89 |
} |
|
| 90 |
|
|
| 91 |
if (newType.equals("word")) { // update form property
|
|
| 92 |
updateFormValueIfMatch(value); |
|
| 93 |
} else { // update another word property
|
|
| 94 |
if (value != null) {
|
|
| 95 |
updateAnaValuesIfMatch(value); |
|
| 96 |
} |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
// write the word element |
|
| 100 |
writer.writeStartElement("txm:form");
|
|
| 101 |
writer.writeCharacters(formValue); |
|
| 102 |
writer.writeEndElement(); |
|
| 103 |
|
|
| 104 |
for (String k : anaValues.keySet()) {
|
|
| 105 |
String resp = anaResps.get(k); |
|
| 106 |
if (resp == null) resp = "#txm_recode"; |
|
| 107 |
|
|
| 108 |
writer.writeStartElement("txm:ana");
|
|
| 109 |
writer.writeAttribute("resp", resp);
|
|
| 110 |
writer.writeAttribute("type", k);
|
|
| 111 |
writer.writeCharacters(anaValues.get(k)); |
|
| 112 |
writer.writeEndElement(); |
|
| 113 |
} |
|
| 114 |
} catch (XMLStreamException e) {
|
|
| 115 |
e.printStackTrace(); |
|
| 116 |
} |
|
| 117 |
} else if (localname.equals("ana")) {
|
|
| 118 |
anaValues.put(typeName, typeValue); |
|
| 119 |
inAna = false; |
|
| 120 |
} else if (localname.equals("form")) {
|
|
| 121 |
inForm = false; |
|
| 122 |
} |
|
| 123 |
|
|
| 124 |
if (!inW) super.processEndElement(); // don't write W content |
|
| 125 |
} |
|
| 126 |
|
|
| 127 |
protected void updateFormValueIfMatch(String value) {
|
|
| 128 |
for (Pattern rule : rules.keySet()) {
|
|
| 129 |
if (rule.matcher(value).matches()) {
|
|
| 130 |
formValue = rules.get(rule); |
|
| 131 |
return; // ok stop |
|
| 132 |
} |
|
| 133 |
} |
|
| 134 |
|
|
| 135 |
noMatchValues.add(value); |
|
| 136 |
} |
|
| 137 |
|
|
| 138 |
protected void updateAnaValuesIfMatch(String value) {
|
|
| 139 |
for (Pattern rule : rules.keySet()) {
|
|
| 140 |
if (rule.matcher(value).matches()) {
|
|
| 141 |
value = rules.get(rule); |
|
| 142 |
anaValues.put(newType, value); // do a replace if newType == oldType :-) |
|
| 143 |
anaResps.put(newType, "#txm_recode"); |
|
| 144 |
return; |
|
| 145 |
} |
|
| 146 |
} |
|
| 147 |
if (DELETE.equals(mode)) anaValues.put(newType, ""); // do a replace if newType == oldType :-) |
|
| 148 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR("+value+")"); // do a replace if newType == oldType :-)
|
|
| 149 |
|
|
| 150 |
noMatchValues.add(value); |
|
| 151 |
} |
|
| 152 |
|
|
| 153 |
public HashSet<String> getNoMatchValues() {
|
|
| 154 |
return noMatchValues; |
|
| 155 |
} |
|
| 156 |
|
|
| 157 |
public static void main(String args[]) {
|
|
| 158 |
try {
|
|
| 159 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml");
|
|
| 160 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml");
|
|
| 161 |
String oldType = "type"; |
|
| 162 |
String newType = "type"; |
|
| 163 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
| 164 |
rules.put(Pattern.compile("w"), "WORD");
|
|
| 165 |
rules.put(Pattern.compile("x.+"), "XWORD");
|
|
| 166 |
rules.put(Pattern.compile("y"), "YWORD");
|
|
| 167 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD");
|
|
| 168 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
| 169 |
System.out.println(converter.process(tmpFile)); |
|
| 170 |
} catch (Exception e) {
|
|
| 171 |
e.printStackTrace(); |
|
| 172 |
} |
|
| 173 |
} |
|
| 174 |
} |
|
| 0 | 175 | |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationSyMoGIHWriter.java (revision 850) | ||
|---|---|---|
| 1 |
package org.txm.annotation.kr.core; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.FileWriter; |
|
| 5 |
import java.io.IOException; |
|
| 6 |
import java.net.MalformedURLException; |
|
| 7 |
import java.util.ArrayList; |
|
| 8 |
import java.util.HashMap; |
|
| 9 |
import java.util.HashSet; |
|
| 10 |
import java.util.List; |
|
| 11 |
|
|
| 12 |
import javax.xml.stream.XMLInputFactory; |
|
| 13 |
import javax.xml.stream.XMLOutputFactory; |
|
| 14 |
import javax.xml.stream.XMLStreamException; |
|
| 15 |
import javax.xml.stream.XMLStreamWriter; |
|
| 16 |
|
|
| 17 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
|
| 18 |
import org.txm.importer.StaxIdentityParser; |
|
| 19 |
|
|
| 20 |
/** |
|
| 21 |
* The Class AnnotationStandoffInjector. |
|
| 22 |
* |
|
| 23 |
* @author sgedzelman, mdecorde |
|
| 24 |
* |
|
| 25 |
* copy a XML-TXM file without annotations elements |
|
| 26 |
* and creates annotations XML-TEI-SymoGIH annotation files for each annotation author |
|
| 27 |
* |
|
| 28 |
*/ |
|
| 29 |
public class AnnotationSyMoGIHWriter extends StaxIdentityParser {
|
|
| 30 |
|
|
| 31 |
File xmlStandOffDirectory; |
|
| 32 |
boolean debug = false; |
|
| 33 |
|
|
| 34 |
String currentRef ; |
|
| 35 |
String currentAuthor ; |
|
| 36 |
String currentDate; |
|
| 37 |
String currentStartPos ; |
|
| 38 |
String currentEndPos ; |
|
| 39 |
//read xmlFile, to find annotations and update/write to xmlstandofffile |
|
| 40 |
//order annotations by annotator |
|
| 41 |
////// order annotations by date |
|
| 42 |
HashSet<String> types = new HashSet<String>(); |
|
| 43 |
ArrayList<String> positions; |
|
| 44 |
HashMap<String, ArrayList<String>> annotationsPositions; |
|
| 45 |
XMLStreamWriter currentWriter; |
|
| 46 |
XMLStreamWriter standoffWriter; |
|
| 47 |
String currentType; |
|
| 48 |
boolean startAnnotation = false; |
|
| 49 |
private String filename; |
|
| 50 |
|
|
| 51 |
// author -> date -> annotation_values |
|
| 52 |
HashMap<String, HashMap<String, ArrayList<String[]>>> allannotations = new HashMap<String, HashMap<String, ArrayList<String[]>>>(); |
|
| 53 |
HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>> allannotationspositions = new HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>>(); |
|
| 54 |
private String textid; |
|
| 55 |
|
|
| 56 |
/** |
|
| 57 |
* |
|
| 58 |
* @param xmlStandOffFile |
|
| 59 |
* @param tokenAnnotations |
|
| 60 |
* @param annotationsToAddByStartPos |
|
| 61 |
* @param annotationsToAddByEndPos |
|
| 62 |
* @param corpus_start_position |
|
| 63 |
* @param debug |
|
| 64 |
* @throws XMLStreamException |
|
| 65 |
* @throws IOException |
|
| 66 |
*/ |
|
| 67 |
public AnnotationSyMoGIHWriter(String textid, File xmlFile, File xmlStandOffDirectory, List<AnnotationType> types, boolean debug) throws IOException, XMLStreamException {
|
|
| 68 |
super(xmlFile.toURI().toURL()); // init reader and writer |
|
| 69 |
this.textid = textid; |
|
| 70 |
this.filename = xmlFile.getName(); |
|
| 71 |
this.debug = debug; |
|
| 72 |
this.xmlStandOffDirectory = xmlStandOffDirectory; |
|
| 73 |
factory = XMLInputFactory.newInstance(); |
|
| 74 |
annotationsPositions = new HashMap<String, ArrayList<String>>(); |
|
| 75 |
//System.out.println("AnnotationStandoff - ");
|
|
| 76 |
for (AnnotationType type : types){
|
|
| 77 |
this.types.add(type.getId()); //.getName().toLowerCase()); |
|
| 78 |
//System.out.println("Annotation Types in TXM : "+type.getName().toLowerCase() +" vs "+ type.getId());
|
|
| 79 |
} |
|
| 80 |
} |
|
| 81 |
|
|
| 82 |
/*<TEI xmlns="http://www.tei-c.org/ns/1.0"> |
|
| 83 |
<teiHeader> |
|
| 84 |
<fileDesc> |
|
| 85 |
<titleStmt> |
|
| 86 |
<title>Title</title> |
|
| 87 |
</titleStmt> |
|
| 88 |
<publicationStmt> |
|
| 89 |
<p>Publication Information</p> |
|
| 90 |
</publicationStmt> |
|
| 91 |
<sourceDesc> |
|
| 92 |
<p>Ce document permet l'annotation sémantique de tous les textes concernant l'association avec des unités de connaissance</p> |
|
| 93 |
</sourceDesc> |
|
| 94 |
</fileDesc> |
|
| 95 |
</teiHeader> |
|
| 96 |
<text> |
|
| 97 |
<body> |
|
| 98 |
<div> |
|
| 99 |
<div> |
|
| 100 |
<!-- La date dans le header indique la date d'annotation --> |
|
| 101 |
<head> |
|
| 102 |
<date type="annotation_date" when="2016-06-16"/> |
|
| 103 |
</head> |
|
| 104 |
<span type="identification d'entités nommées" ana="CoAc56389" |
|
| 105 |
target="#w_article_baip_1254-0714_1850_num_01_005_974_tei_2152 |
|
| 106 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2153 |
|
| 107 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2154 |
|
| 108 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2155 |
|
| 109 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2156" /> |
|
| 110 |
</div> |
|
| 111 |
</div> |
|
| 112 |
</body> |
|
| 113 |
</text> |
|
| 114 |
</TEI>*/ |
|
| 115 |
|
|
| 116 |
protected void processStartElement() throws XMLStreamException, IOException {
|
|
| 117 |
//<coac author="gazelledess" ref="CoAc397" start="5" end="5"> |
|
| 118 |
|
|
| 119 |
/*if(localname.startsWith("actr")){
|
|
| 120 |
System.out.println("Check existence of actr in Corpus !!!! "+localname);
|
|
| 121 |
}*/ |
|
| 122 |
boolean foundAnnot = false; |
|
| 123 |
|
|
| 124 |
|
|
| 125 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // don't write txm annotation elements
|
|
| 126 |
//System.out.println(" START "+ localname);
|
|
| 127 |
foundAnnot = true; |
|
| 128 |
currentType = localname; |
|
| 129 |
//<txm:actr author="gazelledess" ref="PhileasFogg" date="2016-09-05" start="56" end="57"> |
|
| 130 |
currentAuthor = parser.getAttributeValue(null, "author"); |
|
| 131 |
currentRef = parser.getAttributeValue(null, "ref"); |
|
| 132 |
currentStartPos = parser.getAttributeValue(null, "start"); |
|
| 133 |
currentEndPos = parser.getAttributeValue(null, "end"); |
|
| 134 |
currentDate = parser.getAttributeValue(null, "date"); |
|
| 135 |
//annotation is here |
|
| 136 |
startAnnotation = true; |
|
| 137 |
positions = new ArrayList<String>(); |
|
| 138 |
annotationsPositions.put(currentType, positions); |
|
| 139 |
|
|
| 140 |
// initialize allannotations |
|
| 141 |
if (!allannotations.containsKey(currentAuthor)) {
|
|
| 142 |
allannotations.put(currentAuthor, new HashMap<String, ArrayList<String[]>>()); |
|
| 143 |
allannotationspositions.put(currentAuthor, new HashMap<String, ArrayList<ArrayList<String>>>()); |
|
| 144 |
} |
|
| 145 |
HashMap<String, ArrayList<String[]>> authorsAnnotation = allannotations.get(currentAuthor); |
|
| 146 |
HashMap<String, ArrayList<ArrayList<String>>> authorsAnnotationPositions = allannotationspositions.get(currentAuthor); |
|
| 147 |
if (!authorsAnnotation.containsKey(currentDate)) {
|
|
| 148 |
authorsAnnotation.put(currentDate, new ArrayList<String[]>()); |
|
| 149 |
authorsAnnotationPositions.put(currentDate, new ArrayList<ArrayList<String>>()); |
|
| 150 |
} |
|
| 151 |
|
|
| 152 |
storeAnnotation(); |
|
| 153 |
} |
|
| 154 |
|
|
| 155 |
if (!foundAnnot) {
|
|
| 156 |
super.processStartElement(); /// continue writing in file all elements, except the tags that are now in stand-off files |
|
| 157 |
|
|
| 158 |
// get words ids of the current annotations |
|
| 159 |
if (localname.equals("w") && startAnnotation) {
|
|
| 160 |
String id = parser.getAttributeValue(null, "id"); |
|
| 161 |
for (String typeIn : annotationsPositions.keySet()) {
|
|
| 162 |
positions = annotationsPositions.get(typeIn); |
|
| 163 |
positions.add(id); |
|
| 164 |
//System.out.println("Positions of w id="+posW+" for ["+typeIn+"] ");
|
|
| 165 |
} |
|
| 166 |
} |
|
| 167 |
} |
|
| 168 |
} |
|
| 169 |
|
|
| 170 |
|
|
| 171 |
/** |
|
| 172 |
* ends the current author stand-off file |
|
| 173 |
* @param currentWriter |
|
| 174 |
*/ |
|
| 175 |
private void writeEndStandOffFile(XMLStreamWriter currentWriter){
|
|
| 176 |
//System.out.println("writeEndStandOffFile ...");
|
|
| 177 |
|
|
| 178 |
try {
|
|
| 179 |
currentWriter.writeEndElement(); |
|
| 180 |
currentWriter.writeEndDocument(); |
|
| 181 |
|
|
| 182 |
currentWriter.flush(); |
|
| 183 |
currentWriter.close(); |
|
| 184 |
} catch (XMLStreamException e) {
|
|
| 185 |
e.printStackTrace(); |
|
| 186 |
} |
|
| 187 |
|
|
| 188 |
} |
|
| 189 |
|
|
| 190 |
/** |
|
| 191 |
* Create the stand-off file for one author |
|
| 192 |
* @param file |
|
| 193 |
* @return |
|
| 194 |
*/ |
|
| 195 |
private XMLStreamWriter writeStartStandOffFile(File file){
|
|
| 196 |
//System.out.println("writeStartStandOffFile ...");
|
|
| 197 |
|
|
| 198 |
String ns = "http://www.tei-c.org/ns/1.0"; |
|
| 199 |
XMLOutputFactory output = XMLOutputFactory.newInstance(); |
|
| 200 |
XMLStreamWriter writer = null ; |
|
| 201 |
try {
|
|
| 202 |
writer = output.createXMLStreamWriter(new FileWriter(file)); |
|
| 203 |
writer.writeStartDocument(); |
|
| 204 |
writer.setPrefix("tei", ns);
|
|
| 205 |
writer.setDefaultNamespace(ns); |
|
| 206 |
|
|
| 207 |
writer.writeStartElement("TEI");
|
|
| 208 |
|
|
| 209 |
writer.writeStartElement("teiHeader");
|
|
| 210 |
writer.writeStartElement("fileDesc");
|
|
| 211 |
|
|
| 212 |
writer.writeStartElement("titleStmt");
|
|
| 213 |
writer.writeStartElement("title");
|
|
| 214 |
writer.writeCharacters(textid); |
|
| 215 |
writer.writeEndElement(); // title |
|
| 216 |
writer.writeEndElement(); // titleStmt |
|
| 217 |
|
|
| 218 |
writer.writeStartElement("publicationStmt");
|
|
| 219 |
writer.writeStartElement("p");
|
|
| 220 |
writer.writeCharacters("PUBLICATION INFO à renseigner");
|
|
| 221 |
writer.writeEndElement(); // p |
|
| 222 |
writer.writeEndElement(); // publicationStmt |
|
| 223 |
|
|
| 224 |
writer.writeStartElement("sourceDesc");
|
|
| 225 |
writer.writeStartElement("p");
|
|
| 226 |
writer.writeCharacters("Ce document permet l'annotation sémantique de tous les textes, par auteur");
|
|
| 227 |
writer.writeEndElement(); // p |
|
| 228 |
writer.writeEndElement(); // sourceDesc |
|
| 229 |
|
|
| 230 |
writer.writeEndElement(); // </fileDesc> |
|
| 231 |
writer.writeStartElement("encodingDesc");
|
|
| 232 |
writer.writeStartElement("projectDesc");
|
|
| 233 |
writer.writeStartElement("p");
|
|
| 234 |
writer.writeCharacters("Annotations created by "+currentAuthor+", for the use in Symogih XML platform");
|
|
| 235 |
writer.writeEndElement(); // p |
|
| 236 |
writer.writeEndElement(); // </projectDesc> |
|
| 237 |
writer.writeEndElement(); // </encodingDesc> |
|
| 238 |
writer.writeEndElement(); // </teiHeader> |
|
| 239 |
|
|
| 240 |
|
|
| 241 |
writer.writeStartElement("text");
|
|
| 242 |
writer.writeStartElement("body");
|
|
| 243 |
writer.writeCharacters("\n");
|
|
| 244 |
writer.writeStartElement("div");
|
|
| 245 |
writer.writeCharacters("\n");
|
|
| 246 |
} catch (XMLStreamException e) {
|
|
| 247 |
// TODO Auto-generated catch block |
|
| 248 |
e.printStackTrace(); |
|
| 249 |
} catch (IOException e) {
|
|
| 250 |
// TODO Auto-generated catch block |
|
| 251 |
e.printStackTrace(); |
|
| 252 |
} |
|
| 253 |
return writer; |
|
| 254 |
} |
|
| 255 |
|
|
| 256 |
private void storeAnnotation() {
|
|
| 257 |
allannotations.get(currentAuthor).get(currentDate).add(new String[]{currentDate, currentRef, currentType});
|
|
| 258 |
} |
|
| 259 |
|
|
| 260 |
private void storeAnnotationPositions() {
|
|
| 261 |
allannotationspositions.get(currentAuthor).get(currentDate).add(positions); |
|
| 262 |
} |
|
| 263 |
|
|
| 264 |
/** |
|
| 265 |
* write stand-off annotation |
|
| 266 |
*/ |
|
| 267 |
private void writeStartAnnotationToStandoffFile(String[] data, ArrayList<String> positions) {
|
|
| 268 |
//System.out.println("writeStartAnnotationToStandoffFile ...");
|
|
| 269 |
try {
|
|
| 270 |
currentWriter.writeStartElement("span");
|
|
| 271 |
currentWriter.writeAttribute("type","named_entities_identifications");
|
|
| 272 |
currentWriter.writeAttribute("ana", data[1]);
|
|
| 273 |
StringBuffer listWids = new StringBuffer(); |
|
| 274 |
for (String posW : positions) {
|
|
| 275 |
listWids.append("#"+posW+" ");
|
|
| 276 |
} |
|
| 277 |
currentWriter.writeAttribute("target", listWids.toString().trim());
|
|
| 278 |
currentWriter.writeComment("type="+data[2]);
|
|
| 279 |
currentWriter.writeEndElement(); // span |
|
| 280 |
currentWriter.writeCharacters("\n");
|
|
| 281 |
|
|
| 282 |
} catch (XMLStreamException e) {
|
|
| 283 |
// TODO Auto-generated catch block |
|
| 284 |
e.printStackTrace(); |
|
| 285 |
} |
|
| 286 |
} |
|
| 287 |
|
|
| 288 |
protected void processEndElement() throws XMLStreamException {
|
|
| 289 |
boolean foundAnnot = false; |
|
| 290 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // skip annotation end element
|
|
| 291 |
//System.out.println(" END "+ localname);
|
|
| 292 |
foundAnnot = true; |
|
| 293 |
//annotation ends here |
|
| 294 |
storeAnnotationPositions(); |
|
| 295 |
|
|
| 296 |
if (annotationsPositions.containsKey(localname)) {
|
|
| 297 |
annotationsPositions.remove(localname); |
|
Formats disponibles : Unified diff