Révision 850
tmp/org.txm.annotation.kr.core/.classpath (revision 850) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="lib" path="libs/hsqldb.jar"/> |
|
11 |
<classpathentry kind="lib" path="libs/postgresql-9.4.1207.jre6.jar"/> |
|
12 |
<classpathentry kind="lib" path="libs/sqlite-jdbc-3.8.11.2.jar"/> |
|
13 |
<classpathentry kind="output" path="bin"/> |
|
14 |
</classpath> |
|
0 | 15 |
tmp/org.txm.annotation.kr.core/META-INF/persistence.xml (revision 850) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<persistence version="2.1" xmlns="http://xmlns.jcp.org/xml/ns/persistence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
3 |
<persistence-unit name="HSQLKRPERSISTENCE" transaction-type="RESOURCE_LOCAL"> |
|
4 |
<provider>org.eclipse.persistence.jpa.PersistenceProvider</provider> |
|
5 |
|
|
6 |
<class>org.txm.annotation.kr.core.Annotation</class> |
|
7 |
<class>org.txm.annotation.kr.core.repository.AnnotationType</class> |
|
8 |
<class>org.txm.annotation.kr.core.repository.TypedValue</class> |
|
9 |
<!-- |
|
10 |
<class>org.txm.functions.dictionary_jpa.EntryId</class> |
|
11 |
<class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
12 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> |
|
13 |
--> |
|
14 |
<properties> |
|
15 |
<property name="javax.persistence.jdbc.driver" value="org.hsqldb.jdbcDriver"/> |
|
16 |
<property name="javax.persistence.jdbc.url" value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0"/> |
|
17 |
<property name="javax.persistence.jdbc.user" value="SA"/> |
|
18 |
|
|
19 |
<!-- <property name="eclipselink.logging.parameters" value="false"/> |
|
20 |
<property name="eclipselink.logging.level" value="ON" /> |
|
21 |
|
|
22 |
|
|
23 |
<property name="eclipselink.jdbc.read-connections.min" value="1" /> |
|
24 |
<property name="eclipselink.jdbc.write-connections.min" value="1" /> |
|
25 |
<property name="eclipselink.jdbc.batch-writing" value="JDBC" /> |
|
26 |
--> |
|
27 |
|
|
28 |
<!-- Logging |
|
29 |
<property name="eclipselink.logging.file" value="output.log"/> |
|
30 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> |
|
31 |
--> |
|
32 |
|
|
33 |
<!-- |
|
34 |
<property name="eclipselink.logging.level" value="FINE" /> |
|
35 |
<property name="eclipselink.logging.timestamp" value="false" /> |
|
36 |
<property name="eclipselink.logging.session" value="false" /> |
|
37 |
<property name="eclipselink.logging.thread" value="false" /> |
|
38 |
--> |
|
39 |
|
|
40 |
<!-- <property name="eclipselink.ddl-generation" value="drop-and-create-tables"/> --> |
|
41 |
<!-- <property name="eclipselink.ddl-generation.output-mode" value="database"/> --> |
|
42 |
</properties> |
|
43 |
|
|
44 |
</persistence-unit> |
|
45 |
</persistence> |
|
0 | 46 |
tmp/org.txm.annotation.kr.core/META-INF/MANIFEST.MF (revision 850) | ||
---|---|---|
1 |
Manifest-Version: 1.0 |
|
2 |
Require-Bundle: org.txm.core;bundle-version="0.7.0";visibility:=reexport, |
|
3 |
org.txm.searchengine.cqp.core;visibility:=reexport, |
|
4 |
org.txm.utils;bundle-version="1.0.0";visibility:=reexport, |
|
5 |
javax.persistence;bundle-version="2.1.0";visibility:=reexport, |
|
6 |
org.eclipse.persistence.asm;bundle-version="3.3.1";visibility:=reexport, |
|
7 |
org.eclipse.persistence.jpa;bundle-version="2.6.0";visibility:=reexport, |
|
8 |
org.eclipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport, |
|
9 |
org.eclipse.persistence.antlr;bundle-version="3.2.0";visibility:=reexport, |
|
10 |
org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport, |
|
11 |
org.txm.searchengine.core;bundle-version="1.0.0";visibility:=reexport, |
|
12 |
org.eclipse.ui.workbench;visibility:=reexport, |
|
13 |
org.txm.annotation.core;visibility:=reexport |
|
14 |
Export-Package: org.hsqldb, |
|
15 |
org.hsqldb.auth, |
|
16 |
org.hsqldb.dbinfo, |
|
17 |
org.hsqldb.error, |
|
18 |
org.hsqldb.index, |
|
19 |
org.hsqldb.jdbc, |
|
20 |
org.hsqldb.jdbc.pool, |
|
21 |
org.hsqldb.lib, |
|
22 |
org.hsqldb.lib.java, |
|
23 |
org.hsqldb.lib.tar, |
|
24 |
org.hsqldb.map, |
|
25 |
org.hsqldb.navigator, |
|
26 |
org.hsqldb.persist, |
|
27 |
org.hsqldb.resources, |
|
28 |
org.hsqldb.result, |
|
29 |
org.hsqldb.rights, |
|
30 |
org.hsqldb.rowio, |
|
31 |
org.hsqldb.scriptio, |
|
32 |
org.hsqldb.server, |
|
33 |
org.hsqldb.types, |
|
34 |
org.hsqldb.util, |
|
35 |
org.postgresql, |
|
36 |
org.postgresql.copy, |
|
37 |
org.postgresql.core, |
|
38 |
org.postgresql.core.v2, |
|
39 |
org.postgresql.core.v3, |
|
40 |
org.postgresql.ds, |
|
41 |
org.postgresql.ds.common, |
|
42 |
org.postgresql.fastpath, |
|
43 |
org.postgresql.geometric, |
|
44 |
org.postgresql.gss, |
|
45 |
org.postgresql.hostchooser, |
|
46 |
org.postgresql.jdbc, |
|
47 |
org.postgresql.jdbc2, |
|
48 |
org.postgresql.jdbc2.optional, |
|
49 |
org.postgresql.jdbc3, |
|
50 |
org.postgresql.largeobject, |
|
51 |
org.postgresql.osgi, |
|
52 |
org.postgresql.ssl, |
|
53 |
org.postgresql.ssl.jdbc4, |
|
54 |
org.postgresql.sspi, |
|
55 |
org.postgresql.translation, |
|
56 |
org.postgresql.util, |
|
57 |
org.postgresql.xa, |
|
58 |
org.sqlite, |
|
59 |
org.sqlite.core, |
|
60 |
org.sqlite.date, |
|
61 |
org.sqlite.javax, |
|
62 |
org.sqlite.jdbc3, |
|
63 |
org.sqlite.jdbc4, |
|
64 |
org.sqlite.util, |
|
65 |
org.txm.annotation.kr.core, |
|
66 |
org.txm.annotation.kr.core.conversion, |
|
67 |
org.txm.annotation.kr.core.preferences, |
|
68 |
org.txm.annotation.kr.core.repository, |
|
69 |
org.txm.annotation.kr.core.storage.temporary, |
|
70 |
org.txm.annotation.kr.core.temporary |
|
71 |
Bundle-ActivationPolicy: lazy |
|
72 |
Bundle-ClassPath: .,libs/hsqldb.jar,libs/postgresql-9.4.1207.jre6.jar, |
|
73 |
libs/sqlite-jdbc-3.8.11.2.jar |
|
74 |
Bundle-Version: 1.0.0.qualifier |
|
75 |
Bundle-Name: org.txm.annotation.kr.core |
|
76 |
Bundle-ManifestVersion: 2 |
|
77 |
Bundle-Activator: org.txm.annotation.kr.core.Activator |
|
78 |
Bundle-SymbolicName: org.txm.annotation.kr.core;singleton:=true |
|
79 |
Meta-Persistence: META-INF/persistence.xml |
|
80 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.7 |
|
81 |
|
|
0 | 82 |
tmp/org.txm.annotation.kr.core/.project (revision 850) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>org.txm.annotation.kr.core</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/AllTests.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.temporary; |
|
2 |
|
|
3 |
import org.junit.runner.RunWith; |
|
4 |
import org.junit.runners.Suite; |
|
5 |
import org.junit.runners.Suite.SuiteClasses; |
|
6 |
import org.txm.StartToolbox; |
|
7 |
|
|
8 |
@RunWith(Suite.class) |
|
9 |
@SuiteClasses({ StartToolbox.class, CreateAnnotation.class, DeleteAnnotation.class, |
|
10 |
UpdateAnnotation.class }) |
|
11 |
public class AllTests { |
|
12 |
|
|
13 |
} |
|
0 | 14 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/DeleteAnnotation.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.temporary; |
|
2 |
|
|
3 |
import static org.junit.Assert.*; |
|
4 |
|
|
5 |
import org.junit.Test; |
|
6 |
|
|
7 |
public class DeleteAnnotation { |
|
8 |
|
|
9 |
@Test |
|
10 |
public void test() { |
|
11 |
fail("Not yet implemented"); |
|
12 |
} |
|
13 |
|
|
14 |
} |
|
0 | 15 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/UpdateAnnotation.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.temporary; |
|
2 |
|
|
3 |
import static org.junit.Assert.*; |
|
4 |
|
|
5 |
import org.junit.Test; |
|
6 |
|
|
7 |
public class UpdateAnnotation { |
|
8 |
|
|
9 |
@Test |
|
10 |
public void test() { |
|
11 |
fail("Not yet implemented"); |
|
12 |
} |
|
13 |
|
|
14 |
} |
|
0 | 15 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/CreateAnnotation.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.temporary; |
|
2 |
|
|
3 |
import static org.junit.Assert.*; |
|
4 |
|
|
5 |
import java.util.HashMap; |
|
6 |
|
|
7 |
import org.junit.Test; |
|
8 |
import org.txm.Toolbox; |
|
9 |
import org.txm.annotation.kr.core.storage.temporary.TemporaryAnnotationManager; |
|
10 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
11 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
12 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
13 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
14 |
|
|
15 |
public class CreateAnnotation { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void test() throws CqiClientException, InvalidCqpIdException { |
|
19 |
if (!Toolbox.isInitialized()) fail("Toolbox not initialized."); |
|
20 |
MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus("VOEUX"); |
|
21 |
if (corpus == null) fail("Corpus Voeux not loaded."); |
|
22 |
|
|
23 |
HashMap<String, Object> properties = TemporaryAnnotationManager.getInitialisationProperties(this.getClass(), corpus); |
|
24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home")+"/workspace442/org.txm.core/META-INF/persistence.xml"); |
|
25 |
|
|
26 |
TemporaryAnnotationManager tam = new TemporaryAnnotationManager(corpus, properties); |
|
27 |
System.out.println(tam); |
|
28 |
} |
|
29 |
|
|
30 |
} |
|
0 | 31 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationComparator.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core; |
|
2 |
|
|
3 |
import java.text.DateFormat; |
|
4 |
import java.text.SimpleDateFormat; |
|
5 |
import java.util.Comparator; |
|
6 |
import java.util.StringTokenizer; |
|
7 |
|
|
8 |
public class AnnotationComparator implements Comparator<Annotation> { |
|
9 |
|
|
10 |
public int compare(Annotation a1, Annotation a2) { |
|
11 |
// comparer e1 et e2 |
|
12 |
|
|
13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd"); |
|
14 |
//System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate())); |
|
15 |
//System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate())); |
|
16 |
StringTokenizer tokenizer1 = new StringTokenizer(dateformat.format(a1.getDate()), "-"); |
|
17 |
StringTokenizer tokenizer2 = new StringTokenizer(dateformat.format(a2.getDate()), "-"); |
|
18 |
|
|
19 |
for(int i = 0 ; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens() ; ++i) { |
|
20 |
String token1 = tokenizer1.nextToken(); |
|
21 |
String token2 = tokenizer2.nextToken(); |
|
22 |
int valint1 = new Integer(token1).intValue(); |
|
23 |
int valint2 = new Integer(token2).intValue(); |
|
24 |
//System.out.println(valint1+" | "+valint2); |
|
25 |
if (valint1<valint2){ |
|
26 |
return -1; |
|
27 |
}else { |
|
28 |
if(valint1>valint2){ |
|
29 |
return 1; |
|
30 |
}else { |
|
31 |
//System.out.println("idem ["+i+"]"); |
|
32 |
} |
|
33 |
|
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
return 0; |
|
38 |
|
|
39 |
} |
|
40 |
|
|
41 |
public static void main(String[] args) { |
|
42 |
AnnotationComparator comp = new AnnotationComparator(); |
|
43 |
Annotation a1 = new Annotation("truc", "bidule", 3, 10); |
|
44 |
Annotation a2 = new Annotation("truc", "bidule", 6, 8); |
|
45 |
int ret = comp.compare(a1, a2); |
|
46 |
System.out.println("Le retour est : "+ret); |
|
47 |
} |
|
48 |
} |
|
0 | 49 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/DatabasePersistenceManager.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
|
|
5 |
import javax.persistence.EntityManager; |
|
6 |
|
|
7 |
public class DatabasePersistenceManager { |
|
8 |
|
|
9 |
//Object can be Corpus or KnowledgeRepository |
|
10 |
protected HashMap<Object, EntityManager> managers; |
|
11 |
public static final String PERSISTENCE_UNIT_NAME = "HSQLKRPERSISTENCE"; |
|
12 |
public static String ACCESS_SQL = "sql"; |
|
13 |
public static String ACCESS_FILE = "file"; |
|
14 |
public static String ACCESS_SPARQL = "sparql"; |
|
15 |
|
|
16 |
/** |
|
17 |
* Instantiates a new database manager. |
|
18 |
*/ |
|
19 |
public DatabasePersistenceManager() { |
|
20 |
managers = new HashMap<Object, EntityManager>() ; |
|
21 |
} |
|
22 |
|
|
23 |
/** |
|
24 |
* The Object can be a Corpus or a KnowledgeRepository |
|
25 |
* @param obj |
|
26 |
* @return |
|
27 |
*/ |
|
28 |
public EntityManager getJPAEntityManager(Object obj){ |
|
29 |
if (managers.containsKey(obj)) { |
|
30 |
return managers.get(obj); |
|
31 |
} |
|
32 |
return null; |
|
33 |
} |
|
34 |
|
|
35 |
public void closeManager(Object key) { |
|
36 |
if (!managers.keySet().contains(key)) return; |
|
37 |
|
|
38 |
managers.get(key).close(); |
|
39 |
managers.remove(key); |
|
40 |
} |
|
41 |
|
|
42 |
public void closeAllManagers() { |
|
43 |
for (Object key : managers.keySet()) { |
|
44 |
EntityManager m = managers.get(key); |
|
45 |
m.flush(); |
|
46 |
m.createNativeQuery("SHUTDOWN;"); |
|
47 |
m.close(); |
|
48 |
} |
|
49 |
managers.clear(); |
|
50 |
} |
|
51 |
} |
|
0 | 52 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationManager.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.util.ArrayList; |
|
5 |
import java.util.Arrays; |
|
6 |
import java.util.HashMap; |
|
7 |
import java.util.List; |
|
8 |
|
|
9 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
10 |
import org.txm.annotation.kr.core.repository.AnnotationEffect; |
|
11 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
|
12 |
import org.txm.annotation.kr.core.repository.TypedValue; |
|
13 |
import org.txm.annotation.kr.core.storage.temporary.TemporaryAnnotationManager; |
|
14 |
import org.txm.core.engines.Engine; |
|
15 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
16 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
17 |
import org.txm.utils.logger.Log; |
|
18 |
|
|
19 |
/** |
|
20 |
* Manage annotations and is able to return annotation saved in JPA |
|
21 |
* |
|
22 |
* @author mdecorde |
|
23 |
* |
|
24 |
*/ |
|
25 |
public class AnnotationManager implements Engine { |
|
26 |
|
|
27 |
MainCorpus corpus; |
|
28 |
TemporaryAnnotationManager tempManager; |
|
29 |
CQPAnnotationManager cqpManager; |
|
30 |
boolean dirty = false; |
|
31 |
|
|
32 |
public AnnotationManager(MainCorpus mainCorpus){ |
|
33 |
this.corpus = mainCorpus; |
|
34 |
} |
|
35 |
|
|
36 |
public TemporaryAnnotationManager getTemporaryManager(){ |
|
37 |
return tempManager; |
|
38 |
} |
|
39 |
|
|
40 |
public CQPAnnotationManager getCQPManager(){ |
|
41 |
return cqpManager; |
|
42 |
} |
|
43 |
|
|
44 |
public boolean saveAnnotations() throws Exception{ |
|
45 |
List<Annotation> annots = tempManager.getAnnotations(); |
|
46 |
if (annots.isEmpty()) { |
|
47 |
System.out.println("No annotation to save. Aborting."); |
|
48 |
dirty = false; |
|
49 |
return true; |
|
50 |
} |
|
51 |
|
|
52 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
|
53 |
if (writer.writeAnnotations(annots)) { |
|
54 |
Log.info("Annotations succesfully written. Deleting temporary annotations..."); |
|
55 |
tempManager.deleteAnnotations(); |
|
56 |
dirty = false; |
|
57 |
return true; |
|
58 |
} |
|
59 |
return false; |
|
60 |
} |
|
61 |
|
|
62 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception{ |
|
63 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
|
64 |
|
|
65 |
if (writer.writeAnnotationsInStandoff(resultZipFile)) { |
|
66 |
Log.info("Annotations succesfully written in "+resultZipFile); |
|
67 |
return true; |
|
68 |
} |
|
69 |
return false; |
|
70 |
} |
|
71 |
|
|
72 |
//TODO: not ended? |
|
73 |
/** |
|
74 |
* Deletes the annotations stored in the temporary annotation manager |
|
75 |
* @param type |
|
76 |
* @param job |
|
77 |
* @return |
|
78 |
* @throws Exception |
|
79 |
*/ |
|
80 |
public boolean deleteAnnotations(AnnotationType type, IProgressMonitor job) throws Exception { |
|
81 |
List<Annotation> temporaryAnnotations = null; |
|
82 |
List<Annotation> cqpAnnotations = null; |
|
83 |
try { |
|
84 |
temporaryAnnotations = tempManager.getAnnotations(type); |
|
85 |
tempManager.getEntityManager().getTransaction().begin(); |
|
86 |
for (Annotation a : temporaryAnnotations){ |
|
87 |
if (job != null && job.isCanceled()) { |
|
88 |
System.out.println("Delete annotation canceled."); |
|
89 |
return false; |
|
90 |
} |
|
91 |
tempManager.deleteAnnotation(type, a.getStart(), a.getEnd()); |
|
92 |
} |
|
93 |
tempManager.getEntityManager().getTransaction().commit(); |
|
94 |
|
|
95 |
cqpAnnotations = cqpManager.getAnnotations(type); |
|
96 |
tempManager.getEntityManager().getTransaction().begin(); |
|
97 |
for (Annotation a : cqpAnnotations) { |
|
98 |
if (job != null && job.isCanceled()) { |
|
99 |
System.out.println("Delete annotation canceled."); |
|
100 |
return false; |
|
101 |
} |
|
102 |
String value = cqpManager.getCQPAnnotationValue(a.getStart(), a.getEnd(), type); |
|
103 |
if (value != null) { |
|
104 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd()); |
|
105 |
} else { |
|
106 |
tempManager.deleteAnnotationNoCommit(type, a.getStart(), a.getEnd()); |
|
107 |
} |
|
108 |
} |
|
109 |
dirty = true; |
|
110 |
tempManager.getEntityManager().getTransaction().commit(); |
|
111 |
} catch(Exception e) { |
|
112 |
e.printStackTrace(); |
|
113 |
return false; |
|
114 |
} |
|
115 |
return true; |
|
116 |
} |
|
117 |
|
|
118 |
public boolean deleteAnnotations(AnnotationType type, List<Match> matches, IProgressMonitor job) throws Exception { |
|
119 |
try { |
|
120 |
tempManager.getEntityManager().getTransaction().begin(); |
|
121 |
for (Match m : matches) { |
|
122 |
if (job != null && job.isCanceled()) { |
|
123 |
System.out.println("Delete annotation canceled."); |
|
124 |
return false; |
|
125 |
} |
|
126 |
|
|
127 |
String value = cqpManager.getCQPAnnotationValue(m.getStart(), m.getEnd(), type); |
|
128 |
|
|
129 |
if (value != null) { |
|
130 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), m.getStart(), m.getEnd()); |
|
131 |
} else { |
|
132 |
tempManager.deleteAnnotationNoCommit(type, m.getStart(), m.getEnd()); |
|
133 |
} |
|
134 |
} |
|
135 |
dirty = true; |
|
136 |
tempManager.getEntityManager().getTransaction().commit(); |
|
137 |
} catch (Exception e) { |
|
138 |
e.printStackTrace(); |
|
139 |
return false; |
|
140 |
} |
|
141 |
return true; |
|
142 |
} |
|
143 |
|
|
144 |
/** |
|
145 |
* Returns the annotation saved in the temporary database and in the CQP corpus indexes |
|
146 |
* |
|
147 |
* CQP Annotations must be shadowed by temporary annotations of the same type and positions |
|
148 |
*/ |
|
149 |
public List<Annotation> getAnnotationsForMatches(AnnotationType type, List<Match> matches, boolean overlap) { |
|
150 |
List<Annotation> temporaryAnnotations = null; |
|
151 |
List<Annotation> resultAnnotations = new ArrayList<Annotation>(); |
|
152 |
try { |
|
153 |
temporaryAnnotations = tempManager.getAnnotations(type, matches, null, false, overlap); |
|
154 |
temporaryAnnotations = tempManager.getAnnotationsForMatches(matches, temporaryAnnotations, overlap); |
|
155 |
|
|
156 |
List<Annotation> cqpAnnotations = cqpManager.getAnnotationsForMatches(matches, type, overlap); |
|
157 |
|
|
158 |
// System.out.println("Temporary annotations: "+temporaryAnnotations); |
|
159 |
// System.out.println("CQP annotations: "+cqpAnnotations); |
|
160 |
if (cqpAnnotations.size() != matches.size() || temporaryAnnotations.size() != matches.size()) { |
|
161 |
System.out.println("ERROR in getAnnotationsForMatches methods! "); |
|
162 |
return new ArrayList<Annotation>(matches.size()); |
|
163 |
} |
|
164 |
// merge the 2 results |
|
165 |
for (int i = 0 ; i < matches.size() ; i++) { |
|
166 |
if (cqpAnnotations.get(i) == null && temporaryAnnotations.get(i) == null) { |
|
167 |
resultAnnotations.add(null); |
|
168 |
} else if (temporaryAnnotations.get(i) != null) { |
|
169 |
resultAnnotations.add(temporaryAnnotations.get(i)); |
|
170 |
} else if (cqpAnnotations.get(i) != null) { |
|
171 |
resultAnnotations.add(cqpAnnotations.get(i)); |
|
172 |
} else { |
|
173 |
resultAnnotations.add(null); |
|
174 |
} |
|
175 |
} |
|
176 |
} catch (Exception e) { |
|
177 |
e.printStackTrace(); |
|
178 |
return new ArrayList<Annotation>(matches.size()); |
|
179 |
} |
|
180 |
return resultAnnotations; |
|
181 |
} |
|
182 |
|
|
183 |
public void clearInstance() { |
|
184 |
try { |
|
185 |
tempManager.close(); |
|
186 |
} catch(Exception e) { |
|
187 |
System.out.println("Fail to clear AnnotationManager instance: "+e); |
|
188 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
189 |
} |
|
190 |
} |
|
191 |
|
|
192 |
public void checkData() { |
|
193 |
try { |
|
194 |
tempManager.checkData(); |
|
195 |
} catch (Exception e) { |
|
196 |
// TODO Auto-generated catch block |
|
197 |
e.printStackTrace(); |
|
198 |
} |
|
199 |
} |
|
200 |
|
|
201 |
public boolean hasChanges() { |
|
202 |
return tempManager.hasChanges(); |
|
203 |
} |
|
204 |
|
|
205 |
/** |
|
206 |
* |
|
207 |
* @param annotSelectedType not null |
|
208 |
* @param annotSelectedTypedValue not null |
|
209 |
* @param matches not null |
|
210 |
* @param job may be null |
|
211 |
* @return |
|
212 |
*/ |
|
213 |
public HashMap<Match,List<Annotation>> createAnnotations(AnnotationType annotSelectedType, |
|
214 |
TypedValue annotSelectedTypedValue, List<Match> matches, IProgressMonitor job) { |
|
215 |
|
|
216 |
HashMap<Match,List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>(); |
|
217 |
tempManager.getEntityManager().getTransaction().begin(); // warning |
|
218 |
for (Match match : matches) { |
|
219 |
allAnnotationsThatCollides.put(match, new ArrayList<Annotation>()); |
|
220 |
|
|
221 |
if (job != null && job.isCanceled()) { // check if user canceled the job |
|
222 |
System.out.println("Affect annotation canceled."); |
|
223 |
return null; |
|
224 |
} |
|
225 |
|
|
226 |
try { |
|
227 |
List<Annotation> cqpAnnotations = null; |
|
228 |
if (annotSelectedType.getEffect().equals(AnnotationEffect.SEGMENT)) { |
|
229 |
cqpAnnotations = cqpManager.getAnnotations(null, match, null, true); // get all annotations |
|
230 |
// remove A)the wrapping annotations and B) the annotation with same type and same positions |
|
231 |
for (int i = 0 ; i < cqpAnnotations.size() ; i++) { |
|
232 |
Annotation a = cqpAnnotations.get(i); |
|
233 |
|
|
234 |
// exact match + exact type |
|
235 |
if (a.getType().equals(annotSelectedType.getId()) && a.getStart() == match.getStart() && a.getEnd() == match.getEnd()) { |
|
236 |
cqpAnnotations.remove(i); |
|
237 |
i--; |
|
238 |
} else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap |
|
239 |
(a.getStart() <= match.getStart() && match.getEnd() <= a.getEnd()) || |
|
240 |
(match.getStart() <= a.getStart() && a.getEnd() <= match.getEnd()) |
|
241 |
)) { |
|
242 |
cqpAnnotations.remove(i); |
|
243 |
i--; |
|
244 |
} |
|
245 |
} |
|
246 |
} else { |
|
247 |
// no need to test collision (AnnotationType=TOKEN) |
|
248 |
cqpAnnotations = new ArrayList<Annotation>(); |
|
249 |
} |
|
250 |
|
|
251 |
if (cqpAnnotations.size() > 0) { |
|
252 |
allAnnotationsThatCollides.get(match).addAll(cqpAnnotations); |
|
253 |
continue; // don't create annotation, process next match |
|
254 |
} else { // test with temporary annotation manager |
|
255 |
List<Annotation> tempAnnotations = null; |
|
256 |
if (match.getTarget() >= 0) { // the @ operator has been used, annotate only the @position |
|
257 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getTarget(), match.getTarget()); |
|
258 |
} else { |
|
259 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, match.getStart(), match.getEnd()); |
|
260 |
} |
|
261 |
if (tempAnnotations.size() > 0) |
|
262 |
allAnnotationsThatCollides.get(match).addAll(tempAnnotations); |
|
263 |
} |
|
264 |
} catch (Exception e) { |
|
265 |
Log.printStackTrace(e); |
|
266 |
System.out.println("Error during annotation creation: "+e); |
|
267 |
} |
|
268 |
|
|
269 |
if (allAnnotationsThatCollides.get(match).size() == 0) allAnnotationsThatCollides.remove(match); // keep only colision lists |
|
270 |
} |
|
271 |
dirty = true; |
|
272 |
tempManager.getEntityManager().getTransaction().commit(); // warning |
|
273 |
// test if there are CQP annotations for the |
|
274 |
return allAnnotationsThatCollides; |
|
275 |
} |
|
276 |
|
|
277 |
public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) { |
|
278 |
List<Annotation> temporaryAnnotations = null; |
|
279 |
List<Annotation> cqpAnnotations = null; |
|
280 |
try { |
|
281 |
temporaryAnnotations = tempManager.getAnnotations(type, Arrays.asList(new Match(start, end)), null, false, overlap); |
|
282 |
cqpAnnotations = cqpManager.getAnnotations(type, start, end, overlap); |
|
283 |
|
|
284 |
int i = 0; |
|
285 |
for (Annotation a : cqpAnnotations) { |
|
286 |
while (temporaryAnnotations.get(i).getStart() < a.getStart()) { |
|
287 |
i++; |
|
288 |
} |
|
289 |
temporaryAnnotations.add(i, a); |
|
290 |
} |
|
291 |
} catch(Exception e) { |
|
292 |
|
|
293 |
} |
|
294 |
return temporaryAnnotations; |
|
295 |
} |
|
296 |
|
|
297 |
public List<Annotation> getAnnotations(AnnotationType type, int i, int j) { |
|
298 |
return getAnnotations(type, i, j, false); |
|
299 |
} |
|
300 |
|
|
301 |
public void closeAll() { |
|
302 |
Log.info("Closing annotation manager of "+corpus); |
|
303 |
tempManager.close(); |
|
304 |
cqpManager.close(); |
|
305 |
} |
|
306 |
|
|
307 |
public boolean isOpen() { |
|
308 |
return tempManager.getEntityManager() != null && tempManager.getEntityManager().isOpen(); |
|
309 |
} |
|
310 |
|
|
311 |
@Override |
|
312 |
public String getName() { |
|
313 |
return "Annotation"; |
|
314 |
} |
|
315 |
|
|
316 |
@Override |
|
317 |
public boolean getState() { |
|
318 |
return isOpen(); |
|
319 |
} |
|
320 |
|
|
321 |
@Override |
|
322 |
public boolean initialize() throws Exception { |
|
323 |
tempManager = new TemporaryAnnotationManager(corpus); |
|
324 |
dirty = tempManager.getAnnotations().size() > 0; |
|
325 |
cqpManager = new CQPAnnotationManager(corpus); |
|
326 |
return false; |
|
327 |
} |
|
328 |
|
|
329 |
@Override |
|
330 |
public boolean start(IProgressMonitor arg0) throws Exception { |
|
331 |
return true; |
|
332 |
} |
|
333 |
|
|
334 |
@Override |
|
335 |
public boolean stop() throws Exception { |
|
336 |
return true; |
|
337 |
} |
|
338 |
|
|
339 |
/** |
|
340 |
* |
|
341 |
* @return |
|
342 |
*/ |
|
343 |
public boolean isDirty() { |
|
344 |
return dirty; |
|
345 |
} |
|
346 |
} |
|
0 | 347 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationPK.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core; |
|
2 |
|
|
3 |
import java.io.Serializable; |
|
4 |
|
|
5 |
import javax.persistence.*; |
|
6 |
|
|
7 |
@Embeddable |
|
8 |
public class AnnotationPK implements Serializable { |
|
9 |
|
|
10 |
private static final long serialVersionUID = -2360693333015275209L; |
|
11 |
|
|
12 |
//corresponding to the start and end positions (in the corpus) |
|
13 |
private int startpos; |
|
14 |
private int endpos; |
|
15 |
private String refType; |
|
16 |
|
|
17 |
public AnnotationPK() { |
|
18 |
} |
|
19 |
|
|
20 |
public AnnotationPK(int start, int end, String refType) { |
|
21 |
this.startpos = start; |
|
22 |
this.endpos = end; |
|
23 |
this.refType = refType; |
|
24 |
} |
|
25 |
|
|
26 |
public void setRefType(String refType) { |
|
27 |
this.refType = refType; |
|
28 |
} |
|
29 |
|
|
30 |
public void setStartPosition(int start) { |
|
31 |
this.startpos = start; |
|
32 |
} |
|
33 |
|
|
34 |
public void setEndPosition(int end) { |
|
35 |
this.endpos = end; |
|
36 |
} |
|
37 |
|
|
38 |
public String getRefType() { |
|
39 |
return refType; |
|
40 |
} |
|
41 |
|
|
42 |
public int getStartPosition() { |
|
43 |
return startpos; |
|
44 |
} |
|
45 |
|
|
46 |
public int getEndPosition() { |
|
47 |
return endpos; |
|
48 |
} |
|
49 |
|
|
50 |
public boolean equals(Object obj) { |
|
51 |
if (obj == null) return false; |
|
52 |
if (obj == this) return true; |
|
53 |
if (!(obj instanceof AnnotationPK)) return false; |
|
54 |
|
|
55 |
AnnotationPK other = (AnnotationPK) obj; |
|
56 |
return startpos == other.startpos && endpos == other.endpos&& refType.equals(other.refType); |
|
57 |
} |
|
58 |
|
|
59 |
public int hashCode() { |
|
60 |
return refType.hashCode()+startpos+endpos; |
|
61 |
} |
|
62 |
|
|
63 |
public String toString() { |
|
64 |
return getRefType() + "["+getStartPosition()+"-"+getEndPosition()+"]" ; |
|
65 |
} |
|
66 |
} |
|
0 | 67 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/CorpusRuledConvertion.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.conversion; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.File; |
|
5 |
import java.io.IOException; |
|
6 |
import java.util.HashSet; |
|
7 |
import java.util.LinkedHashMap; |
|
8 |
import java.util.regex.Pattern; |
|
9 |
|
|
10 |
import javax.xml.stream.XMLStreamException; |
|
11 |
|
|
12 |
import org.apache.commons.lang.StringUtils; |
|
13 |
import org.txm.Toolbox; |
|
14 |
import org.txm.core.preferences.TBXPreferences; |
|
15 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
16 |
import org.txm.stat.utils.ConsoleProgressBar; |
|
17 |
import org.txm.utils.io.IOUtils; |
|
18 |
|
|
19 |
public class CorpusRuledConvertion { |
|
20 |
|
|
21 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
22 |
private String oldType; |
|
23 |
private String newType; |
|
24 |
|
|
25 |
public CorpusRuledConvertion(File conversionFile, String oldType, String newType) throws IOException { |
|
26 |
this.oldType = oldType; |
|
27 |
this.newType = newType; |
|
28 |
|
|
29 |
BufferedReader reader = IOUtils.getReader(conversionFile); |
|
30 |
String line = reader.readLine(); |
|
31 |
while (line != null) { |
|
32 |
int idx = line.indexOf("\t"); |
|
33 |
if (idx > 0) { |
|
34 |
String k = line.substring(0, idx); |
|
35 |
String v = line.substring(idx +1); |
|
36 |
rules.put(Pattern.compile(k), v); |
|
37 |
} |
|
38 |
line = reader.readLine(); |
|
39 |
} |
|
40 |
|
|
41 |
System.out.println("Conversion rules: "+rules); |
|
42 |
} |
|
43 |
|
|
44 |
public CorpusRuledConvertion(LinkedHashMap<Pattern, String> rules, |
|
45 |
String oldType, String newType) { |
|
46 |
this.oldType = oldType; |
|
47 |
this.newType = newType; |
|
48 |
|
|
49 |
this.rules = rules; |
|
50 |
} |
|
51 |
|
|
52 |
public boolean process(MainCorpus corpus) throws XMLStreamException, IOException { |
|
53 |
File binaryCorpusDirectory = corpus.getBaseDirectory(); |
|
54 |
File txmDirectory = new File(binaryCorpusDirectory, "txm"); |
|
55 |
if (!txmDirectory.exists()) { |
|
56 |
System.out.println("'txm' directory not found in "+binaryCorpusDirectory.getAbsolutePath()); |
|
57 |
return false; |
|
58 |
} |
|
59 |
File txmCorpusDirectory = new File(txmDirectory, corpus.getName()); |
|
60 |
if (!txmCorpusDirectory.exists()) { |
|
61 |
System.out.println("'"+corpus.getName()+"' corpus directory not found in "+txmDirectory.getAbsolutePath()); |
|
62 |
return false; |
|
63 |
} |
|
64 |
File[] files = txmCorpusDirectory.listFiles(); |
|
65 |
if (files == null || files.length == 0) { |
|
66 |
System.out.println("No file in "+txmCorpusDirectory); |
|
67 |
return false; |
|
68 |
} |
|
69 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
|
70 |
for (File xmlFile : files) { |
|
71 |
cpb.tick(); |
|
72 |
if (xmlFile.isDirectory()) continue; |
|
73 |
if (xmlFile.isHidden()) continue; |
|
74 |
if (!xmlFile.getName().endsWith(".xml")) continue; |
|
75 |
|
|
76 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName()); |
|
77 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, XMLTXMFileRuledConversion.ABANDON); |
|
78 |
if (converter.process(tmpFile)) { |
|
79 |
xmlFile.delete(); |
|
80 |
// try { |
|
81 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName())); |
|
82 |
// } catch (IOException e) { |
|
83 |
// // TODO Auto-generated catch block |
|
84 |
// e.printStackTrace(); |
|
85 |
// } |
|
86 |
|
|
87 |
HashSet<String> errors = converter.getNoMatchValues(); |
|
88 |
if (errors.size() > 0) { |
|
89 |
System.out.println("Some values did not match rule:"); |
|
90 |
int i = 0; |
|
91 |
for (String error : errors) { |
|
92 |
System.out.println("\t"+error); |
|
93 |
if (i >= 10) break; |
|
94 |
} |
|
95 |
if (errors.size() > 10) { |
|
96 |
try { |
|
97 |
File errorFile = new File(Toolbox.getTXMHOMEPATH(), "errors.txt"); |
|
98 |
IOUtils.write(errorFile, StringUtils.join(errors, "\t")); |
|
99 |
System.out.println("More errors, see "+errorFile.getAbsolutePath()); |
|
100 |
} catch (Exception e) { |
|
101 |
e.printStackTrace(); |
|
102 |
} |
|
103 |
} |
|
104 |
return false; |
|
105 |
} |
|
106 |
|
|
107 |
tmpFile.renameTo(xmlFile); |
|
108 |
if (tmpFile.exists()) { |
|
109 |
System.out.println("Could not replace original file with the result file. "+xmlFile+ " with "+tmpFile); |
|
110 |
return false; |
|
111 |
} |
|
112 |
} else { |
|
113 |
System.out.println("Fail to process "+xmlFile); |
|
114 |
return false; |
|
115 |
} |
|
116 |
} |
|
117 |
System.out.println(); // end of ConsoleProgressBar |
|
118 |
return true; |
|
119 |
} |
|
120 |
} |
|
0 | 121 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core.conversion; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.util.HashSet; |
|
6 |
import java.util.LinkedHashMap; |
|
7 |
import java.util.regex.Pattern; |
|
8 |
|
|
9 |
import javax.xml.stream.XMLStreamException; |
|
10 |
|
|
11 |
import org.txm.importer.StaxIdentityParser; |
|
12 |
|
|
13 |
public class XMLTXMFileRuledConversion extends StaxIdentityParser { |
|
14 |
|
|
15 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
16 |
protected String oldType; |
|
17 |
protected String newType; |
|
18 |
|
|
19 |
public static final String DELETE = "supprimer"; |
|
20 |
public static final String COPY = "copier"; |
|
21 |
public static final String ABANDON = "abandon"; |
|
22 |
HashSet<String> noMatchValues = new HashSet<String>(); |
|
23 |
|
|
24 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException { |
|
25 |
super(infile); |
|
26 |
this.rules = rules; |
|
27 |
this.oldType = oldType; |
|
28 |
this.newType = newType; |
|
29 |
|
|
30 |
this.mode = mode; |
|
31 |
|
|
32 |
if (!this.newType.startsWith("#")) this.newType = "#"+this.newType; |
|
33 |
if (!this.oldType.startsWith("#")) this.oldType = "#"+this.oldType; |
|
34 |
} |
|
35 |
|
|
36 |
boolean inW = false, inAna = false, inForm; |
|
37 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
|
38 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
|
39 |
String typeName = null; |
|
40 |
String respName = null; |
|
41 |
String formValue, typeValue = null; |
|
42 |
private Object mode; |
|
43 |
|
|
44 |
@Override |
|
45 |
public void processStartElement() throws XMLStreamException, IOException { |
|
46 |
if (!inW) super.processStartElement(); // don't write W content |
|
47 |
|
|
48 |
if (localname.equals("w")) { |
|
49 |
inW = true; |
|
50 |
anaValues.clear(); |
|
51 |
anaResps.clear(); |
|
52 |
|
|
53 |
//initialize the new type to a empty value in case there is transformation rule |
|
54 |
anaValues.put(newType, ""); |
|
55 |
anaResps.put(newType, "#txm_recode"); |
|
56 |
} else if (localname.equals("ana")) { |
|
57 |
inAna = true; |
|
58 |
typeName = parser.getAttributeValue(null, "type"); |
|
59 |
respName = parser.getAttributeValue(null, "resp"); |
|
60 |
anaResps.put(typeName, respName); |
|
61 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
62 |
typeValue = ""; |
|
63 |
} else if (localname.equals("form")) { |
|
64 |
inForm = true; |
|
65 |
formValue = ""; |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
@Override |
|
70 |
public void processCharacters() throws XMLStreamException { |
|
71 |
if (inW && inAna) typeValue+=parser.getText(); |
|
72 |
else if (inW && inForm) formValue+=parser.getText(); |
|
73 |
else super.processCharacters(); |
|
74 |
} |
|
75 |
|
|
76 |
@Override |
|
77 |
public void processEndElement() throws XMLStreamException { |
|
78 |
if (localname.equals("w")) { |
|
79 |
inW = false; |
|
80 |
|
|
81 |
// write W content |
|
82 |
try { |
|
83 |
// get the value to test |
|
84 |
String value = null; |
|
85 |
if (oldType.equals("word")) { |
|
86 |
value = formValue; |
|
87 |
} else { |
|
88 |
value = anaValues.get(oldType); |
|
89 |
} |
|
90 |
|
|
91 |
if (newType.equals("word")) { // update form property |
|
92 |
updateFormValueIfMatch(value); |
|
93 |
} else { // update another word property |
|
94 |
if (value != null) { |
|
95 |
updateAnaValuesIfMatch(value); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
// write the word element |
|
100 |
writer.writeStartElement("txm:form"); |
|
101 |
writer.writeCharacters(formValue); |
|
102 |
writer.writeEndElement(); |
|
103 |
|
|
104 |
for (String k : anaValues.keySet()) { |
|
105 |
String resp = anaResps.get(k); |
|
106 |
if (resp == null) resp = "#txm_recode"; |
|
107 |
|
|
108 |
writer.writeStartElement("txm:ana"); |
|
109 |
writer.writeAttribute("resp", resp); |
|
110 |
writer.writeAttribute("type", k); |
|
111 |
writer.writeCharacters(anaValues.get(k)); |
|
112 |
writer.writeEndElement(); |
|
113 |
} |
|
114 |
} catch (XMLStreamException e) { |
|
115 |
e.printStackTrace(); |
|
116 |
} |
|
117 |
} else if (localname.equals("ana")) { |
|
118 |
anaValues.put(typeName, typeValue); |
|
119 |
inAna = false; |
|
120 |
} else if (localname.equals("form")) { |
|
121 |
inForm = false; |
|
122 |
} |
|
123 |
|
|
124 |
if (!inW) super.processEndElement(); // don't write W content |
|
125 |
} |
|
126 |
|
|
127 |
protected void updateFormValueIfMatch(String value) { |
|
128 |
for (Pattern rule : rules.keySet()) { |
|
129 |
if (rule.matcher(value).matches()) { |
|
130 |
formValue = rules.get(rule); |
|
131 |
return; // ok stop |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
noMatchValues.add(value); |
|
136 |
} |
|
137 |
|
|
138 |
protected void updateAnaValuesIfMatch(String value) { |
|
139 |
for (Pattern rule : rules.keySet()) { |
|
140 |
if (rule.matcher(value).matches()) { |
|
141 |
value = rules.get(rule); |
|
142 |
anaValues.put(newType, value); // do a replace if newType == oldType :-) |
|
143 |
anaResps.put(newType, "#txm_recode"); |
|
144 |
return; |
|
145 |
} |
|
146 |
} |
|
147 |
if (DELETE.equals(mode)) anaValues.put(newType, ""); // do a replace if newType == oldType :-) |
|
148 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR("+value+")"); // do a replace if newType == oldType :-) |
|
149 |
|
|
150 |
noMatchValues.add(value); |
|
151 |
} |
|
152 |
|
|
153 |
public HashSet<String> getNoMatchValues() { |
|
154 |
return noMatchValues; |
|
155 |
} |
|
156 |
|
|
157 |
public static void main(String args[]) { |
|
158 |
try { |
|
159 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml"); |
|
160 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml"); |
|
161 |
String oldType = "type"; |
|
162 |
String newType = "type"; |
|
163 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
164 |
rules.put(Pattern.compile("w"), "WORD"); |
|
165 |
rules.put(Pattern.compile("x.+"), "XWORD"); |
|
166 |
rules.put(Pattern.compile("y"), "YWORD"); |
|
167 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD"); |
|
168 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
169 |
System.out.println(converter.process(tmpFile)); |
|
170 |
} catch (Exception e) { |
|
171 |
e.printStackTrace(); |
|
172 |
} |
|
173 |
} |
|
174 |
} |
|
0 | 175 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationSyMoGIHWriter.java (revision 850) | ||
---|---|---|
1 |
package org.txm.annotation.kr.core; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.FileWriter; |
|
5 |
import java.io.IOException; |
|
6 |
import java.net.MalformedURLException; |
|
7 |
import java.util.ArrayList; |
|
8 |
import java.util.HashMap; |
|
9 |
import java.util.HashSet; |
|
10 |
import java.util.List; |
|
11 |
|
|
12 |
import javax.xml.stream.XMLInputFactory; |
|
13 |
import javax.xml.stream.XMLOutputFactory; |
|
14 |
import javax.xml.stream.XMLStreamException; |
|
15 |
import javax.xml.stream.XMLStreamWriter; |
|
16 |
|
|
17 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
|
18 |
import org.txm.importer.StaxIdentityParser; |
|
19 |
|
|
20 |
/** |
|
21 |
* The Class AnnotationStandoffInjector. |
|
22 |
* |
|
23 |
* @author sgedzelman, mdecorde |
|
24 |
* |
|
25 |
* copy a XML-TXM file without annotations elements |
|
26 |
* and creates annotations XML-TEI-SymoGIH annotation files for each annotation author |
|
27 |
* |
|
28 |
*/ |
|
29 |
public class AnnotationSyMoGIHWriter extends StaxIdentityParser { |
|
30 |
|
|
31 |
File xmlStandOffDirectory; |
|
32 |
boolean debug = false; |
|
33 |
|
|
34 |
String currentRef ; |
|
35 |
String currentAuthor ; |
|
36 |
String currentDate; |
|
37 |
String currentStartPos ; |
|
38 |
String currentEndPos ; |
|
39 |
//read xmlFile, to find annotations and update/write to xmlstandofffile |
|
40 |
//order annotations by annotator |
|
41 |
////// order annotations by date |
|
42 |
HashSet<String> types = new HashSet<String>(); |
|
43 |
ArrayList<String> positions; |
|
44 |
HashMap<String, ArrayList<String>> annotationsPositions; |
|
45 |
XMLStreamWriter currentWriter; |
|
46 |
XMLStreamWriter standoffWriter; |
|
47 |
String currentType; |
|
48 |
boolean startAnnotation = false; |
|
49 |
private String filename; |
|
50 |
|
|
51 |
// author -> date -> annotation_values |
|
52 |
HashMap<String, HashMap<String, ArrayList<String[]>>> allannotations = new HashMap<String, HashMap<String, ArrayList<String[]>>>(); |
|
53 |
HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>> allannotationspositions = new HashMap<String, HashMap<String, ArrayList<ArrayList<String>>>>(); |
|
54 |
private String textid; |
|
55 |
|
|
56 |
/** |
|
57 |
* |
|
58 |
* @param xmlStandOffFile |
|
59 |
* @param tokenAnnotations |
|
60 |
* @param annotationsToAddByStartPos |
|
61 |
* @param annotationsToAddByEndPos |
|
62 |
* @param corpus_start_position |
|
63 |
* @param debug |
|
64 |
* @throws XMLStreamException |
|
65 |
* @throws IOException |
|
66 |
*/ |
|
67 |
public AnnotationSyMoGIHWriter(String textid, File xmlFile, File xmlStandOffDirectory, List<AnnotationType> types, boolean debug) throws IOException, XMLStreamException { |
|
68 |
super(xmlFile.toURI().toURL()); // init reader and writer |
|
69 |
this.textid = textid; |
|
70 |
this.filename = xmlFile.getName(); |
|
71 |
this.debug = debug; |
|
72 |
this.xmlStandOffDirectory = xmlStandOffDirectory; |
|
73 |
factory = XMLInputFactory.newInstance(); |
|
74 |
annotationsPositions = new HashMap<String, ArrayList<String>>(); |
|
75 |
//System.out.println("AnnotationStandoff - "); |
|
76 |
for (AnnotationType type : types){ |
|
77 |
this.types.add(type.getId()); //.getName().toLowerCase()); |
|
78 |
//System.out.println("Annotation Types in TXM : "+type.getName().toLowerCase() +" vs "+ type.getId()); |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
/*<TEI xmlns="http://www.tei-c.org/ns/1.0"> |
|
83 |
<teiHeader> |
|
84 |
<fileDesc> |
|
85 |
<titleStmt> |
|
86 |
<title>Title</title> |
|
87 |
</titleStmt> |
|
88 |
<publicationStmt> |
|
89 |
<p>Publication Information</p> |
|
90 |
</publicationStmt> |
|
91 |
<sourceDesc> |
|
92 |
<p>Ce document permet l'annotation sémantique de tous les textes concernant l'association avec des unités de connaissance</p> |
|
93 |
</sourceDesc> |
|
94 |
</fileDesc> |
|
95 |
</teiHeader> |
|
96 |
<text> |
|
97 |
<body> |
|
98 |
<div> |
|
99 |
<div> |
|
100 |
<!-- La date dans le header indique la date d'annotation --> |
|
101 |
<head> |
|
102 |
<date type="annotation_date" when="2016-06-16"/> |
|
103 |
</head> |
|
104 |
<span type="identification d'entités nommées" ana="CoAc56389" |
|
105 |
target="#w_article_baip_1254-0714_1850_num_01_005_974_tei_2152 |
|
106 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2153 |
|
107 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2154 |
|
108 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2155 |
|
109 |
#w_article_baip_1254-0714_1850_num_01_005_974_tei_2156" /> |
|
110 |
</div> |
|
111 |
</div> |
|
112 |
</body> |
|
113 |
</text> |
|
114 |
</TEI>*/ |
|
115 |
|
|
116 |
protected void processStartElement() throws XMLStreamException, IOException { |
|
117 |
//<coac author="gazelledess" ref="CoAc397" start="5" end="5"> |
|
118 |
|
|
119 |
/*if(localname.startsWith("actr")){ |
|
120 |
System.out.println("Check existence of actr in Corpus !!!! "+localname); |
|
121 |
}*/ |
|
122 |
boolean foundAnnot = false; |
|
123 |
|
|
124 |
|
|
125 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // don't write txm annotation elements |
|
126 |
//System.out.println(" START "+ localname); |
|
127 |
foundAnnot = true; |
|
128 |
currentType = localname; |
|
129 |
//<txm:actr author="gazelledess" ref="PhileasFogg" date="2016-09-05" start="56" end="57"> |
|
130 |
currentAuthor = parser.getAttributeValue(null, "author"); |
|
131 |
currentRef = parser.getAttributeValue(null, "ref"); |
|
132 |
currentStartPos = parser.getAttributeValue(null, "start"); |
|
133 |
currentEndPos = parser.getAttributeValue(null, "end"); |
|
134 |
currentDate = parser.getAttributeValue(null, "date"); |
|
135 |
//annotation is here |
|
136 |
startAnnotation = true; |
|
137 |
positions = new ArrayList<String>(); |
|
138 |
annotationsPositions.put(currentType, positions); |
|
139 |
|
|
140 |
// initialize allannotations |
|
141 |
if (!allannotations.containsKey(currentAuthor)) { |
|
142 |
allannotations.put(currentAuthor, new HashMap<String, ArrayList<String[]>>()); |
|
143 |
allannotationspositions.put(currentAuthor, new HashMap<String, ArrayList<ArrayList<String>>>()); |
|
144 |
} |
|
145 |
HashMap<String, ArrayList<String[]>> authorsAnnotation = allannotations.get(currentAuthor); |
|
146 |
HashMap<String, ArrayList<ArrayList<String>>> authorsAnnotationPositions = allannotationspositions.get(currentAuthor); |
|
147 |
if (!authorsAnnotation.containsKey(currentDate)) { |
|
148 |
authorsAnnotation.put(currentDate, new ArrayList<String[]>()); |
|
149 |
authorsAnnotationPositions.put(currentDate, new ArrayList<ArrayList<String>>()); |
|
150 |
} |
|
151 |
|
|
152 |
storeAnnotation(); |
|
153 |
} |
|
154 |
|
|
155 |
if (!foundAnnot) { |
|
156 |
super.processStartElement(); /// continue writing in file all elements, except the tags that are now in stand-off files |
|
157 |
|
|
158 |
// get words ids of the current annotations |
|
159 |
if (localname.equals("w") && startAnnotation) { |
|
160 |
String id = parser.getAttributeValue(null, "id"); |
|
161 |
for (String typeIn : annotationsPositions.keySet()) { |
|
162 |
positions = annotationsPositions.get(typeIn); |
|
163 |
positions.add(id); |
|
164 |
//System.out.println("Positions of w id="+posW+" for ["+typeIn+"] "); |
|
165 |
} |
|
166 |
} |
|
167 |
} |
|
168 |
} |
|
169 |
|
|
170 |
|
|
171 |
/** |
|
172 |
* ends the current author stand-off file |
|
173 |
* @param currentWriter |
|
174 |
*/ |
|
175 |
private void writeEndStandOffFile(XMLStreamWriter currentWriter){ |
|
176 |
//System.out.println("writeEndStandOffFile ..."); |
|
177 |
|
|
178 |
try { |
|
179 |
currentWriter.writeEndElement(); |
|
180 |
currentWriter.writeEndDocument(); |
|
181 |
|
|
182 |
currentWriter.flush(); |
|
183 |
currentWriter.close(); |
|
184 |
} catch (XMLStreamException e) { |
|
185 |
e.printStackTrace(); |
|
186 |
} |
|
187 |
|
|
188 |
} |
|
189 |
|
|
190 |
/** |
|
191 |
* Create the stand-off file for one author |
|
192 |
* @param file |
|
193 |
* @return |
|
194 |
*/ |
|
195 |
private XMLStreamWriter writeStartStandOffFile(File file){ |
|
196 |
//System.out.println("writeStartStandOffFile ..."); |
|
197 |
|
|
198 |
String ns = "http://www.tei-c.org/ns/1.0"; |
|
199 |
XMLOutputFactory output = XMLOutputFactory.newInstance(); |
|
200 |
XMLStreamWriter writer = null ; |
|
201 |
try { |
|
202 |
writer = output.createXMLStreamWriter(new FileWriter(file)); |
|
203 |
writer.writeStartDocument(); |
|
204 |
writer.setPrefix("tei", ns); |
|
205 |
writer.setDefaultNamespace(ns); |
|
206 |
|
|
207 |
writer.writeStartElement("TEI"); |
|
208 |
|
|
209 |
writer.writeStartElement("teiHeader"); |
|
210 |
writer.writeStartElement("fileDesc"); |
|
211 |
|
|
212 |
writer.writeStartElement("titleStmt"); |
|
213 |
writer.writeStartElement("title"); |
|
214 |
writer.writeCharacters(textid); |
|
215 |
writer.writeEndElement(); // title |
|
216 |
writer.writeEndElement(); // titleStmt |
|
217 |
|
|
218 |
writer.writeStartElement("publicationStmt"); |
|
219 |
writer.writeStartElement("p"); |
|
220 |
writer.writeCharacters("PUBLICATION INFO à renseigner"); |
|
221 |
writer.writeEndElement(); // p |
|
222 |
writer.writeEndElement(); // publicationStmt |
|
223 |
|
|
224 |
writer.writeStartElement("sourceDesc"); |
|
225 |
writer.writeStartElement("p"); |
|
226 |
writer.writeCharacters("Ce document permet l'annotation sémantique de tous les textes, par auteur"); |
|
227 |
writer.writeEndElement(); // p |
|
228 |
writer.writeEndElement(); // sourceDesc |
|
229 |
|
|
230 |
writer.writeEndElement(); // </fileDesc> |
|
231 |
writer.writeStartElement("encodingDesc"); |
|
232 |
writer.writeStartElement("projectDesc"); |
|
233 |
writer.writeStartElement("p"); |
|
234 |
writer.writeCharacters("Annotations created by "+currentAuthor+", for the use in Symogih XML platform"); |
|
235 |
writer.writeEndElement(); // p |
|
236 |
writer.writeEndElement(); // </projectDesc> |
|
237 |
writer.writeEndElement(); // </encodingDesc> |
|
238 |
writer.writeEndElement(); // </teiHeader> |
|
239 |
|
|
240 |
|
|
241 |
writer.writeStartElement("text"); |
|
242 |
writer.writeStartElement("body"); |
|
243 |
writer.writeCharacters("\n"); |
|
244 |
writer.writeStartElement("div"); |
|
245 |
writer.writeCharacters("\n"); |
|
246 |
} catch (XMLStreamException e) { |
|
247 |
// TODO Auto-generated catch block |
|
248 |
e.printStackTrace(); |
|
249 |
} catch (IOException e) { |
|
250 |
// TODO Auto-generated catch block |
|
251 |
e.printStackTrace(); |
|
252 |
} |
|
253 |
return writer; |
|
254 |
} |
|
255 |
|
|
256 |
private void storeAnnotation() { |
|
257 |
allannotations.get(currentAuthor).get(currentDate).add(new String[]{currentDate, currentRef, currentType}); |
|
258 |
} |
|
259 |
|
|
260 |
private void storeAnnotationPositions() { |
|
261 |
allannotationspositions.get(currentAuthor).get(currentDate).add(positions); |
|
262 |
} |
|
263 |
|
|
264 |
/** |
|
265 |
* write stand-off annotation |
|
266 |
*/ |
|
267 |
private void writeStartAnnotationToStandoffFile(String[] data, ArrayList<String> positions) { |
|
268 |
//System.out.println("writeStartAnnotationToStandoffFile ..."); |
|
269 |
try { |
|
270 |
currentWriter.writeStartElement("span"); |
|
271 |
currentWriter.writeAttribute("type","named_entities_identifications"); |
|
272 |
currentWriter.writeAttribute("ana", data[1]); |
|
273 |
StringBuffer listWids = new StringBuffer(); |
|
274 |
for (String posW : positions) { |
|
275 |
listWids.append("#"+posW+" "); |
|
276 |
} |
|
277 |
currentWriter.writeAttribute("target", listWids.toString().trim()); |
|
278 |
currentWriter.writeComment("type="+data[2]); |
|
279 |
currentWriter.writeEndElement(); // span |
|
280 |
currentWriter.writeCharacters("\n"); |
|
281 |
|
|
282 |
} catch (XMLStreamException e) { |
|
283 |
// TODO Auto-generated catch block |
|
284 |
e.printStackTrace(); |
|
285 |
} |
|
286 |
} |
|
287 |
|
|
288 |
protected void processEndElement() throws XMLStreamException { |
|
289 |
boolean foundAnnot = false; |
|
290 |
if (types.contains(localname) && parser.getPrefix().equals("txm")) { // skip annotation end element |
|
291 |
//System.out.println(" END "+ localname); |
|
292 |
foundAnnot = true; |
|
293 |
//annotation ends here |
|
294 |
storeAnnotationPositions(); |
|
295 |
|
|
296 |
if (annotationsPositions.containsKey(localname)) { |
|
297 |
annotationsPositions.remove(localname); |
Formats disponibles : Unified diff