Révision 2396
tmp/org.txm.ca.rcp/.classpath (revision 2396) | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<classpath> |
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="output" path="bin"/> |
|
3 |
<classpathentry kind="con" |
|
4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
5 |
<classpathentry kind="con" |
|
6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
7 |
<accessrules> |
|
8 |
<accessrule kind="accessible" pattern="**" /> |
|
9 |
</accessrules> |
|
10 |
</classpathentry> |
|
11 |
<classpathentry kind="src" path="src" /> |
|
12 |
<classpathentry kind="output" path="bin" /> |
|
11 | 13 |
</classpath> |
tmp/org.txm.ca.core/.classpath (revision 2396) | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<classpath> |
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="output" path="bin"/> |
|
3 |
<classpathentry kind="con" |
|
4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
5 |
<classpathentry kind="con" |
|
6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
7 |
<accessrules> |
|
8 |
<accessrule kind="accessible" pattern="**" /> |
|
9 |
</accessrules> |
|
10 |
</classpathentry> |
|
11 |
<classpathentry kind="src" path="src" /> |
|
12 |
<classpathentry kind="output" path="bin" /> |
|
11 | 13 |
</classpath> |
tmp/org.txm.ca.core/src/org/txm/ca/core/functions/package.html (revision 2396) | ||
---|---|---|
1 | 1 |
<html> |
2 | 2 |
<body> |
3 |
<p>Correspondance analysis.</p> |
|
3 |
<p>Correspondance analysis.</p>
|
|
4 | 4 |
</body> |
5 | 5 |
</html> |
tmp/org.txm.annotation.kr.core/.classpath (revision 2396) | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<classpath> |
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="lib" path="libs/hsqldb.jar"/> |
|
11 |
<classpathentry kind="lib" path="libs/postgresql-9.4.1207.jre6.jar"/> |
|
12 |
<classpathentry kind="lib" path="libs/sqlite-jdbc-3.8.11.2.jar"/> |
|
13 |
<classpathentry kind="output" path="bin"/> |
|
3 |
<classpathentry kind="con" |
|
4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
5 |
<classpathentry kind="con" |
|
6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
7 |
<accessrules> |
|
8 |
<accessrule kind="accessible" pattern="**" /> |
|
9 |
</accessrules> |
|
10 |
</classpathentry> |
|
11 |
<classpathentry kind="src" path="src" /> |
|
12 |
<classpathentry kind="lib" path="libs/hsqldb.jar" /> |
|
13 |
<classpathentry kind="lib" |
|
14 |
path="libs/postgresql-9.4.1207.jre6.jar" /> |
|
15 |
<classpathentry kind="lib" |
|
16 |
path="libs/sqlite-jdbc-3.8.11.2.jar" /> |
|
17 |
<classpathentry kind="output" path="bin" /> |
|
14 | 18 |
</classpath> |
tmp/org.txm.annotation.kr.core/META-INF/persistence.xml (revision 2396) | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 |
<persistence version="2.1" xmlns="http://xmlns.jcp.org/xml/ns/persistence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
3 |
<persistence-unit name="HSQLKRPERSISTENCE" transaction-type="RESOURCE_LOCAL"> |
|
2 |
<persistence version="2.1" |
|
3 |
xmlns="http://xmlns.jcp.org/xml/ns/persistence" |
|
4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
5 |
xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
6 |
<persistence-unit name="HSQLKRPERSISTENCE" |
|
7 |
transaction-type="RESOURCE_LOCAL"> |
|
4 | 8 |
<provider>org.eclipse.persistence.jpa.PersistenceProvider</provider> |
5 |
|
|
9 |
|
|
6 | 10 |
<class>org.txm.annotation.kr.core.Annotation</class> |
7 | 11 |
<class>org.txm.annotation.kr.core.repository.AnnotationType</class> |
8 | 12 |
<class>org.txm.annotation.kr.core.repository.TypedValue</class> |
9 |
<!-- |
|
10 |
<class>org.txm.functions.dictionary_jpa.EntryId</class> |
|
11 |
<class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
12 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> |
|
13 |
--> |
|
13 |
<!-- <class>org.txm.functions.dictionary_jpa.EntryId</class> <class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
14 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> --> |
|
14 | 15 |
<properties> |
15 |
<property name="javax.persistence.jdbc.driver" value="org.hsqldb.jdbcDriver"/> |
|
16 |
<property name="javax.persistence.jdbc.url" value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0"/> |
|
17 |
<property name="javax.persistence.jdbc.user" value="SA"/> |
|
18 |
|
|
19 |
<property name="eclipselink.logging.level" value="OFF"/> |
|
20 |
<!-- |
|
21 |
<property name="eclipselink.jdbc.read-connections.min" value="1" /> |
|
22 |
<property name="eclipselink.jdbc.write-connections.min" value="1" /> |
|
23 |
<property name="eclipselink.jdbc.batch-writing" value="JDBC" /> |
|
24 |
--> |
|
16 |
<property name="javax.persistence.jdbc.driver" |
|
17 |
value="org.hsqldb.jdbcDriver" /> |
|
18 |
<property name="javax.persistence.jdbc.url" |
|
19 |
value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0" /> |
|
20 |
<property name="javax.persistence.jdbc.user" value="SA" /> |
|
25 | 21 |
|
26 |
<!-- Logging |
|
27 |
<property name="eclipselink.logging.file" value="output.log"/> |
|
28 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> |
|
22 |
<property name="eclipselink.logging.level" value="OFF" /> |
|
23 |
<!-- <property name="eclipselink.jdbc.read-connections.min" value="1" |
|
24 |
/> <property name="eclipselink.jdbc.write-connections.min" value="1" /> <property |
|
25 |
name="eclipselink.jdbc.batch-writing" value="JDBC" /> --> |
|
29 | 26 |
|
30 |
<property name="eclipselink.logging.parameters" value="false"/>
|
|
31 |
<property name="eclipselink.logging.level" value="FINE" />
|
|
32 |
<property name="eclipselink.logging.timestamp" value="false" />
|
|
33 |
<property name="eclipselink.logging.session" value="false" />
|
|
34 |
<property name="eclipselink.logging.thread" value="false" />
|
|
35 |
--> |
|
36 |
|
|
27 |
<!-- Logging <property name="eclipselink.logging.file" value="output.log"/>
|
|
28 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> <property
|
|
29 |
name="eclipselink.logging.parameters" value="false"/> <property name="eclipselink.logging.level"
|
|
30 |
value="FINE" /> <property name="eclipselink.logging.timestamp" value="false"
|
|
31 |
/> <property name="eclipselink.logging.session" value="false" /> <property
|
|
32 |
name="eclipselink.logging.thread" value="false" /> -->
|
|
33 |
|
|
37 | 34 |
<!-- <property name="eclipselink.ddl-generation" value="drop-and-create-tables"/> --> |
38 | 35 |
<!-- <property name="eclipselink.ddl-generation.output-mode" value="database"/> --> |
39 | 36 |
</properties> |
40 |
|
|
37 |
|
|
41 | 38 |
</persistence-unit> |
42 | 39 |
</persistence> |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/DeleteAnnotation.java (revision 2396) | ||
---|---|---|
5 | 5 |
import org.junit.Test; |
6 | 6 |
|
7 | 7 |
public class DeleteAnnotation { |
8 |
|
|
8 |
|
|
9 | 9 |
@Test |
10 | 10 |
public void test() { |
11 | 11 |
fail("Not yet implemented"); |
12 | 12 |
} |
13 |
|
|
13 |
|
|
14 | 14 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/UpdateAnnotation.java (revision 2396) | ||
---|---|---|
5 | 5 |
import org.junit.Test; |
6 | 6 |
|
7 | 7 |
public class UpdateAnnotation { |
8 |
|
|
8 |
|
|
9 | 9 |
@Test |
10 | 10 |
public void test() { |
11 | 11 |
fail("Not yet implemented"); |
12 | 12 |
} |
13 |
|
|
13 |
|
|
14 | 14 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/CreateAnnotation.java (revision 2396) | ||
---|---|---|
13 | 13 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
14 | 14 |
|
15 | 15 |
public class CreateAnnotation { |
16 |
|
|
16 |
|
|
17 | 17 |
@Test |
18 | 18 |
public void test() throws CqiClientException, InvalidCqpIdException { |
19 | 19 |
if (!Toolbox.isInitialized()) fail("Toolbox not initialized."); |
... | ... | |
21 | 21 |
if (corpus == null) fail("Corpus Voeux not loaded."); |
22 | 22 |
|
23 | 23 |
HashMap<String, Object> properties = TemporaryAnnotationManager.getInitialisationProperties(this.getClass(), corpus); |
24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home")+"/workspace442/org.txm.core/META-INF/persistence.xml");
|
|
24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home") + "/workspace442/org.txm.core/META-INF/persistence.xml");
|
|
25 | 25 |
|
26 | 26 |
TemporaryAnnotationManager tam = new TemporaryAnnotationManager(corpus, properties); |
27 | 27 |
System.out.println(tam); |
28 | 28 |
} |
29 |
|
|
29 |
|
|
30 | 30 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/AllTests.java (revision 2396) | ||
---|---|---|
9 | 9 |
@SuiteClasses({ StartToolbox.class, CreateAnnotation.class, DeleteAnnotation.class, |
10 | 10 |
UpdateAnnotation.class }) |
11 | 11 |
public class AllTests { |
12 |
|
|
12 |
|
|
13 | 13 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationComparator.java (revision 2396) | ||
---|---|---|
6 | 6 |
import java.util.StringTokenizer; |
7 | 7 |
|
8 | 8 |
public class AnnotationComparator implements Comparator<Annotation> { |
9 |
|
|
9 |
|
|
10 | 10 |
public int compare(Annotation a1, Annotation a2) { |
11 |
// comparer e1 et e2
|
|
11 |
// comparer e1 et e2
|
|
12 | 12 |
|
13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd");
|
|
14 |
//System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate())); |
|
15 |
//System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate())); |
|
13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd"); |
|
14 |
// System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate()));
|
|
15 |
// System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate()));
|
|
16 | 16 |
StringTokenizer tokenizer1 = new StringTokenizer(dateformat.format(a1.getDate()), "-"); |
17 | 17 |
StringTokenizer tokenizer2 = new StringTokenizer(dateformat.format(a2.getDate()), "-"); |
18 | 18 |
|
19 |
for(int i = 0 ; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens() ; ++i) {
|
|
19 |
for (int i = 0; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens(); ++i) {
|
|
20 | 20 |
String token1 = tokenizer1.nextToken(); |
21 | 21 |
String token2 = tokenizer2.nextToken(); |
22 | 22 |
int valint1 = new Integer(token1).intValue(); |
23 | 23 |
int valint2 = new Integer(token2).intValue(); |
24 |
//System.out.println(valint1+" | "+valint2); |
|
25 |
if (valint1<valint2){
|
|
24 |
// System.out.println(valint1+" | "+valint2);
|
|
25 |
if (valint1 < valint2) {
|
|
26 | 26 |
return -1; |
27 |
}else { |
|
28 |
if(valint1>valint2){ |
|
27 |
} |
|
28 |
else { |
|
29 |
if (valint1 > valint2) { |
|
29 | 30 |
return 1; |
30 |
}else { |
|
31 |
//System.out.println("idem ["+i+"]"); |
|
32 | 31 |
} |
32 |
else { |
|
33 |
// System.out.println("idem ["+i+"]"); |
|
34 |
} |
|
33 | 35 |
|
34 | 36 |
} |
35 | 37 |
} |
36 | 38 |
|
37 | 39 |
return 0; |
38 |
|
|
39 |
}
|
|
40 |
|
|
41 |
} |
|
40 | 42 |
|
41 | 43 |
public static void main(String[] args) { |
42 | 44 |
AnnotationComparator comp = new AnnotationComparator(); |
43 | 45 |
Annotation a1 = new Annotation("truc", "bidule", 3, 10); |
44 | 46 |
Annotation a2 = new Annotation("truc", "bidule", 6, 8); |
45 | 47 |
int ret = comp.compare(a1, a2); |
46 |
System.out.println("Le retour est : "+ret);
|
|
48 |
System.out.println("Le retour est : " + ret);
|
|
47 | 49 |
} |
48 | 50 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/DatabasePersistenceManager.java (revision 2396) | ||
---|---|---|
5 | 5 |
import javax.persistence.EntityManager; |
6 | 6 |
|
7 | 7 |
public class DatabasePersistenceManager { |
8 |
|
|
9 |
//Object can be Corpus or KnowledgeRepository |
|
8 |
|
|
9 |
// Object can be Corpus or KnowledgeRepository
|
|
10 | 10 |
protected HashMap<Object, EntityManager> managers; |
11 |
|
|
11 | 12 |
public static final String PERSISTENCE_UNIT_NAME = "HSQLKRPERSISTENCE"; |
13 |
|
|
12 | 14 |
public static String ACCESS_SQL = "sql"; |
15 |
|
|
13 | 16 |
public static String ACCESS_FILE = "file"; |
17 |
|
|
14 | 18 |
public static String ACCESS_SPARQL = "sparql"; |
15 |
|
|
19 |
|
|
16 | 20 |
/** |
17 | 21 |
* Instantiates a new database manager. |
18 | 22 |
*/ |
19 | 23 |
public DatabasePersistenceManager() { |
20 |
managers = new HashMap<Object, EntityManager>() ;
|
|
24 |
managers = new HashMap<Object, EntityManager>(); |
|
21 | 25 |
} |
22 |
|
|
26 |
|
|
23 | 27 |
/** |
24 | 28 |
* The Object can be a Corpus or a KnowledgeRepository |
29 |
* |
|
25 | 30 |
* @param obj |
26 | 31 |
* @return |
27 | 32 |
*/ |
28 |
public EntityManager getJPAEntityManager(Object obj){ |
|
33 |
public EntityManager getJPAEntityManager(Object obj) {
|
|
29 | 34 |
if (managers.containsKey(obj)) { |
30 | 35 |
return managers.get(obj); |
31 | 36 |
} |
32 | 37 |
return null; |
33 | 38 |
} |
34 |
|
|
39 |
|
|
35 | 40 |
public void closeManager(Object key) { |
36 | 41 |
if (!managers.keySet().contains(key)) return; |
37 |
|
|
42 |
|
|
38 | 43 |
managers.get(key).close(); |
39 | 44 |
managers.remove(key); |
40 | 45 |
} |
41 |
|
|
46 |
|
|
42 | 47 |
public void closeAllManagers() { |
43 | 48 |
for (Object key : managers.keySet()) { |
44 | 49 |
EntityManager m = managers.get(key); |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationManager.java (revision 2396) | ||
---|---|---|
25 | 25 |
* |
26 | 26 |
*/ |
27 | 27 |
public class AnnotationManager { |
28 |
|
|
28 |
|
|
29 | 29 |
MainCorpus corpus; |
30 |
|
|
30 | 31 |
TemporaryAnnotationManager tempManager; |
32 |
|
|
31 | 33 |
CQPAnnotationManager cqpManager; |
34 |
|
|
32 | 35 |
boolean dirty = false; |
33 |
|
|
34 |
public AnnotationManager(MainCorpus mainCorpus){ |
|
36 |
|
|
37 |
public AnnotationManager(MainCorpus mainCorpus) {
|
|
35 | 38 |
this.corpus = mainCorpus; |
36 | 39 |
} |
37 |
|
|
38 |
public TemporaryAnnotationManager getTemporaryManager(){ |
|
40 |
|
|
41 |
public TemporaryAnnotationManager getTemporaryManager() {
|
|
39 | 42 |
return tempManager; |
40 | 43 |
} |
41 | 44 |
|
42 |
public CQPAnnotationManager getCQPManager(){ |
|
45 |
public CQPAnnotationManager getCQPManager() {
|
|
43 | 46 |
return cqpManager; |
44 | 47 |
} |
45 | 48 |
|
... | ... | |
56 | 59 |
monitor.beginTask(KRAnnotationCoreMessages.savingAnnotations, annots.size()); |
57 | 60 |
monitor.setTaskName("writing annotations in XML-TXM files"); |
58 | 61 |
} |
59 |
|
|
62 |
|
|
60 | 63 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
61 | 64 |
if (writer.writeAnnotations(annots, monitor)) { |
62 | 65 |
Log.info(KRAnnotationCoreMessages.annotationSuccesfullyWritten); |
... | ... | |
66 | 69 |
} |
67 | 70 |
return false; |
68 | 71 |
} |
69 |
|
|
70 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception{ |
|
72 |
|
|
73 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception {
|
|
71 | 74 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
72 | 75 |
|
73 | 76 |
if (writer.writeAnnotationsInStandoff(resultZipFile)) { |
... | ... | |
77 | 80 |
return false; |
78 | 81 |
} |
79 | 82 |
|
80 |
//TODO: not ended? |
|
83 |
// TODO: not ended?
|
|
81 | 84 |
/** |
82 | 85 |
* Deletes the annotations stored in the temporary annotation manager |
86 |
* |
|
83 | 87 |
* @param type |
84 | 88 |
* @param job |
85 | 89 |
* @return |
... | ... | |
91 | 95 |
try { |
92 | 96 |
temporaryAnnotations = tempManager.getAnnotations(type); |
93 | 97 |
tempManager.getEntityManager().getTransaction().begin(); |
94 |
for (Annotation a : temporaryAnnotations){ |
|
98 |
for (Annotation a : temporaryAnnotations) {
|
|
95 | 99 |
if (job != null && job.isCanceled()) { |
96 | 100 |
System.out.println("Delete annotation canceled."); |
97 | 101 |
return false; |
... | ... | |
109 | 113 |
} |
110 | 114 |
String value = cqpManager.getCQPAnnotationValue(a.getStart(), a.getEnd(), type); |
111 | 115 |
if (value != null) { |
112 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd()); |
|
113 |
} else { |
|
116 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd()); |
|
117 |
} |
|
118 |
else { |
|
114 | 119 |
tempManager.deleteAnnotationNoCommit(type, a.getStart(), a.getEnd()); |
115 | 120 |
} |
116 | 121 |
} |
117 | 122 |
dirty = true; |
118 | 123 |
tempManager.getEntityManager().getTransaction().commit(); |
119 |
} catch(Exception e) { |
|
124 |
} |
|
125 |
catch (Exception e) { |
|
120 | 126 |
e.printStackTrace(); |
121 | 127 |
return false; |
122 | 128 |
} |
... | ... | |
131 | 137 |
int start, end; |
132 | 138 |
if (match.getTarget() >= 0) { |
133 | 139 |
start = end = match.getTarget(); |
134 |
} else { |
|
140 |
} |
|
141 |
else { |
|
135 | 142 |
start = match.getStart(); |
136 | 143 |
end = match.getEnd(); |
137 | 144 |
if (type.getEffect().equals(AnnotationEffect.TOKEN)) { |
... | ... | |
145 | 152 |
} |
146 | 153 |
|
147 | 154 |
String value = cqpManager.getCQPAnnotationValue(start, end, type); |
148 |
|
|
155 |
|
|
149 | 156 |
if (value != null) { |
150 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), start, end); |
|
151 |
} else { |
|
157 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), start, end); |
|
158 |
} |
|
159 |
else { |
|
152 | 160 |
tempManager.deleteAnnotationNoCommit(type, start, end); |
153 | 161 |
} |
154 | 162 |
} |
155 | 163 |
dirty = true; |
156 | 164 |
tempManager.getEntityManager().getTransaction().commit(); |
157 |
} catch (Exception e) { |
|
165 |
} |
|
166 |
catch (Exception e) { |
|
158 | 167 |
e.printStackTrace(); |
159 | 168 |
return false; |
160 | 169 |
} |
161 | 170 |
return true; |
162 | 171 |
} |
163 |
|
|
172 |
|
|
164 | 173 |
/** |
165 | 174 |
* Returns the annotation saved in the temporary database and in the CQP corpus indexes |
166 | 175 |
* |
... | ... | |
174 | 183 |
temporaryAnnotations = tempManager.getAnnotationsForMatches(matches, temporaryAnnotations, overlap); |
175 | 184 |
|
176 | 185 |
List<? extends Annotation> cqpAnnotations = cqpManager.getAnnotationsForMatches(matches, type, overlap); |
177 |
|
|
178 |
// System.out.println("Temporary annotations: "+temporaryAnnotations);
|
|
179 |
// System.out.println("CQP annotations: "+cqpAnnotations);
|
|
186 |
|
|
187 |
// System.out.println("Temporary annotations: "+temporaryAnnotations);
|
|
188 |
// System.out.println("CQP annotations: "+cqpAnnotations);
|
|
180 | 189 |
if (cqpAnnotations.size() != matches.size() || temporaryAnnotations.size() != matches.size()) { |
181 | 190 |
System.out.println("ERROR in getAnnotationsForMatches methods! "); |
182 | 191 |
return new ArrayList<Annotation>(matches.size()); |
183 | 192 |
} |
184 | 193 |
// merge the 2 results |
185 |
for (int i = 0 ; i < matches.size() ; i++) {
|
|
194 |
for (int i = 0; i < matches.size(); i++) {
|
|
186 | 195 |
if (cqpAnnotations.get(i) == null && temporaryAnnotations.get(i) == null) { |
187 | 196 |
resultAnnotations.add(null); |
188 |
} else if (temporaryAnnotations.get(i) != null) { |
|
197 |
} |
|
198 |
else if (temporaryAnnotations.get(i) != null) { |
|
189 | 199 |
resultAnnotations.add(temporaryAnnotations.get(i)); |
190 |
} else if (cqpAnnotations.get(i) != null) { |
|
200 |
} |
|
201 |
else if (cqpAnnotations.get(i) != null) { |
|
191 | 202 |
resultAnnotations.add(cqpAnnotations.get(i)); |
192 |
} else { |
|
203 |
} |
|
204 |
else { |
|
193 | 205 |
resultAnnotations.add(null); |
194 | 206 |
} |
195 | 207 |
} |
196 |
} catch (Exception e) { |
|
208 |
} |
|
209 |
catch (Exception e) { |
|
197 | 210 |
e.printStackTrace(); |
198 | 211 |
return new ArrayList<Annotation>(matches.size()); |
199 | 212 |
} |
200 | 213 |
return resultAnnotations; |
201 | 214 |
} |
202 |
|
|
215 |
|
|
203 | 216 |
public void clearInstance() { |
204 | 217 |
try { |
205 | 218 |
tempManager.close(); |
206 |
} catch(Exception e) { |
|
207 |
System.out.println("Fail to clear AnnotationManager instance: "+e); |
|
219 |
} |
|
220 |
catch (Exception e) { |
|
221 |
System.out.println("Fail to clear AnnotationManager instance: " + e); |
|
208 | 222 |
org.txm.utils.logger.Log.printStackTrace(e); |
209 | 223 |
} |
210 | 224 |
} |
211 |
|
|
225 |
|
|
212 | 226 |
public void checkData() { |
213 | 227 |
try { |
214 | 228 |
tempManager.checkData(); |
215 |
} catch (Exception e) { |
|
229 |
} |
|
230 |
catch (Exception e) { |
|
216 | 231 |
// TODO Auto-generated catch block |
217 | 232 |
e.printStackTrace(); |
218 | 233 |
} |
219 | 234 |
} |
220 |
|
|
235 |
|
|
221 | 236 |
public boolean hasChanges() { |
222 | 237 |
return tempManager.hasChanges(); |
223 | 238 |
} |
224 |
|
|
239 |
|
|
225 | 240 |
/** |
226 | 241 |
* |
227 | 242 |
* @param annotSelectedType not null |
... | ... | |
230 | 245 |
* @param job may be null |
231 | 246 |
* @return |
232 | 247 |
*/ |
233 |
public HashMap<Match,List<Annotation>> createAnnotations(AnnotationType annotSelectedType, |
|
248 |
public HashMap<Match, List<Annotation>> createAnnotations(AnnotationType annotSelectedType,
|
|
234 | 249 |
TypedValue annotSelectedTypedValue, List<? extends Match> matches, IProgressMonitor job) { |
235 |
|
|
236 |
HashMap<Match,List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>(); |
|
250 |
|
|
251 |
HashMap<Match, List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>();
|
|
237 | 252 |
tempManager.getEntityManager().getTransaction().begin(); // warning |
238 | 253 |
for (Match match : matches) { |
239 | 254 |
allAnnotationsThatCollides.put(match, new ArrayList<Annotation>()); |
... | ... | |
241 | 256 |
int start, end; |
242 | 257 |
if (match.getTarget() >= 0) { |
243 | 258 |
start = end = match.getTarget(); |
244 |
} else { |
|
259 |
} |
|
260 |
else { |
|
245 | 261 |
start = match.getStart(); |
246 | 262 |
end = match.getEnd(); |
247 | 263 |
} |
... | ... | |
256 | 272 |
if (annotSelectedType.getEffect().equals(AnnotationEffect.SEGMENT)) { |
257 | 273 |
cqpAnnotations = cqpManager.getAnnotations(null, match, null, true); // get all annotations |
258 | 274 |
// remove A)the wrapping annotations and B) the annotation with same type and same positions |
259 |
for (int i = 0 ; i < cqpAnnotations.size() ; i++) {
|
|
275 |
for (int i = 0; i < cqpAnnotations.size(); i++) {
|
|
260 | 276 |
Annotation a = cqpAnnotations.get(i); |
261 |
|
|
277 |
|
|
262 | 278 |
// exact match + exact type |
263 | 279 |
if (a.getType().equals(annotSelectedType.getId()) && a.getStart() == start && a.getEnd() == end) { |
264 | 280 |
cqpAnnotations.remove(i); |
265 | 281 |
i--; |
266 |
} else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap
|
|
267 |
(a.getStart() <= start && end <= a.getEnd()) ||
|
|
268 |
(start <= a.getStart() && a.getEnd() <= end)
|
|
269 |
)) { |
|
282 |
} |
|
283 |
else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap
|
|
284 |
(a.getStart() <= start && end <= a.getEnd()) ||
|
|
285 |
(start <= a.getStart() && a.getEnd() <= end))) {
|
|
270 | 286 |
cqpAnnotations.remove(i); |
271 | 287 |
i--; |
272 | 288 |
} |
273 | 289 |
} |
274 |
} else { |
|
290 |
} |
|
291 |
else { |
|
275 | 292 |
// no need to test collision (AnnotationType=TOKEN) |
276 | 293 |
cqpAnnotations = new ArrayList<Annotation>(); |
277 | 294 |
} |
... | ... | |
279 | 296 |
if (cqpAnnotations.size() > 0) { |
280 | 297 |
allAnnotationsThatCollides.get(match).addAll(cqpAnnotations); |
281 | 298 |
continue; // don't create annotation, process next match |
282 |
} else { // test with temporary annotation manager |
|
299 |
} |
|
300 |
else { // test with temporary annotation manager |
|
283 | 301 |
List<Annotation> tempAnnotations = null; |
284 |
if (annotSelectedType.getEffect() == AnnotationEffect.TOKEN) { // only annotate the first word |
|
285 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, start); |
|
286 |
} else { |
|
287 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, end); |
|
288 |
} |
|
302 |
if (annotSelectedType.getEffect() == AnnotationEffect.TOKEN) { // only annotate the first word |
|
303 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, start); |
|
304 |
} |
|
305 |
else { |
|
306 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, end); |
|
307 |
} |
|
289 | 308 |
if (tempAnnotations.size() > 0) |
290 | 309 |
allAnnotationsThatCollides.get(match).addAll(tempAnnotations); |
291 | 310 |
} |
292 |
} catch (Exception e) { |
|
311 |
} |
|
312 |
catch (Exception e) { |
|
293 | 313 |
Log.printStackTrace(e); |
294 |
System.out.println("Error during annotation creation: "+e);
|
|
314 |
System.out.println("Error during annotation creation: " + e);
|
|
295 | 315 |
} |
296 | 316 |
|
297 | 317 |
if (allAnnotationsThatCollides.get(match).size() == 0) allAnnotationsThatCollides.remove(match); // keep only colision lists |
... | ... | |
300 | 320 |
tempManager.getEntityManager().getTransaction().commit(); // warning |
301 | 321 |
return allAnnotationsThatCollides; |
302 | 322 |
} |
303 |
|
|
323 |
|
|
304 | 324 |
public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) { |
305 | 325 |
List<Annotation> temporaryAnnotations = null; |
306 | 326 |
List<Annotation> cqpAnnotations = null; |
... | ... | |
315 | 335 |
} |
316 | 336 |
temporaryAnnotations.add(i, a); |
317 | 337 |
} |
318 |
} catch(Exception e) { |
|
338 |
} |
|
339 |
catch (Exception e) { |
|
319 | 340 |
|
320 | 341 |
} |
321 | 342 |
return temporaryAnnotations; |
... | ... | |
324 | 345 |
public List<Annotation> getAnnotations(AnnotationType type, int i, int j) { |
325 | 346 |
return getAnnotations(type, i, j, false); |
326 | 347 |
} |
327 |
|
|
348 |
|
|
328 | 349 |
public void closeAll() { |
329 |
Log.fine("Closing annotation manager of "+corpus);
|
|
350 |
Log.fine("Closing annotation manager of " + corpus);
|
|
330 | 351 |
tempManager.close(); |
331 | 352 |
cqpManager.close(); |
332 | 353 |
} |
333 |
|
|
354 |
|
|
334 | 355 |
public boolean isOpen() { |
335 | 356 |
return tempManager.getEntityManager() != null && tempManager.getEntityManager().isOpen(); |
336 | 357 |
} |
337 |
|
|
338 |
|
|
358 |
|
|
359 |
|
|
339 | 360 |
public boolean initialize() throws Exception { |
340 | 361 |
tempManager = new TemporaryAnnotationManager(corpus); |
341 | 362 |
dirty = tempManager.getAnnotations().size() > 0; |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationPK.java (revision 2396) | ||
---|---|---|
6 | 6 |
|
7 | 7 |
@Embeddable |
8 | 8 |
public class AnnotationPK implements Serializable { |
9 |
|
|
9 |
|
|
10 | 10 |
private static final long serialVersionUID = -2360693333015275209L; |
11 |
|
|
12 |
//corresponding to the start and end positions (in the corpus) |
|
11 |
|
|
12 |
// corresponding to the start and end positions (in the corpus)
|
|
13 | 13 |
private int startpos; |
14 |
|
|
14 | 15 |
private int endpos; |
16 |
|
|
15 | 17 |
private String refType; |
16 |
|
|
17 |
public AnnotationPK() { |
|
18 |
} |
|
19 |
|
|
18 |
|
|
19 |
public AnnotationPK() {} |
|
20 |
|
|
20 | 21 |
public AnnotationPK(int start, int end, String refType) { |
21 | 22 |
this.startpos = start; |
22 | 23 |
this.endpos = end; |
23 | 24 |
this.refType = refType; |
24 | 25 |
} |
25 |
|
|
26 |
|
|
26 | 27 |
public void setRefType(String refType) { |
27 | 28 |
this.refType = refType; |
28 | 29 |
} |
29 |
|
|
30 |
|
|
30 | 31 |
public void setStartPosition(int start) { |
31 | 32 |
this.startpos = start; |
32 | 33 |
} |
33 |
|
|
34 |
|
|
34 | 35 |
public void setEndPosition(int end) { |
35 | 36 |
this.endpos = end; |
36 | 37 |
} |
37 |
|
|
38 |
|
|
38 | 39 |
public String getRefType() { |
39 | 40 |
return refType; |
40 | 41 |
} |
41 |
|
|
42 |
|
|
42 | 43 |
public int getStartPosition() { |
43 | 44 |
return startpos; |
44 | 45 |
} |
45 |
|
|
46 |
|
|
46 | 47 |
public int getEndPosition() { |
47 | 48 |
return endpos; |
48 | 49 |
} |
49 |
|
|
50 |
|
|
50 | 51 |
public boolean equals(Object obj) { |
51 | 52 |
if (obj == null) return false; |
52 | 53 |
if (obj == this) return true; |
53 | 54 |
if (!(obj instanceof AnnotationPK)) return false; |
54 |
|
|
55 |
|
|
55 | 56 |
AnnotationPK other = (AnnotationPK) obj; |
56 |
return startpos == other.startpos && endpos == other.endpos&& refType.equals(other.refType); |
|
57 |
return startpos == other.startpos && endpos == other.endpos && refType.equals(other.refType);
|
|
57 | 58 |
} |
58 |
|
|
59 |
|
|
59 | 60 |
public int hashCode() { |
60 |
return refType.hashCode()+startpos+endpos;
|
|
61 |
return refType.hashCode() + startpos + endpos;
|
|
61 | 62 |
} |
62 |
|
|
63 |
|
|
63 | 64 |
public String toString() { |
64 |
return getRefType() + "["+getStartPosition()+"-"+getEndPosition()+"]" ;
|
|
65 |
return getRefType() + "[" + getStartPosition() + "-" + getEndPosition() + "]";
|
|
65 | 66 |
} |
66 | 67 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/CorpusRuledConvertion.java (revision 2396) | ||
---|---|---|
16 | 16 |
import org.txm.utils.io.IOUtils; |
17 | 17 |
|
18 | 18 |
public class CorpusRuledConvertion { |
19 |
|
|
19 |
|
|
20 | 20 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
21 |
|
|
21 | 22 |
private String oldType; |
23 |
|
|
22 | 24 |
private String newType; |
23 |
|
|
25 |
|
|
24 | 26 |
public CorpusRuledConvertion(File conversionFile, String oldType, String newType) throws IOException { |
25 | 27 |
this.oldType = oldType; |
26 | 28 |
this.newType = newType; |
27 |
|
|
29 |
|
|
28 | 30 |
BufferedReader reader = IOUtils.getReader(conversionFile); |
29 | 31 |
String line = reader.readLine(); |
30 | 32 |
while (line != null) { |
31 | 33 |
int idx = line.indexOf("\t"); |
32 | 34 |
if (idx > 0) { |
33 | 35 |
String k = line.substring(0, idx); |
34 |
String v = line.substring(idx +1); |
|
36 |
String v = line.substring(idx + 1);
|
|
35 | 37 |
rules.put(Pattern.compile(k), v); |
36 | 38 |
} |
37 | 39 |
line = reader.readLine(); |
38 | 40 |
} |
39 |
|
|
40 |
System.out.println("Conversion rules: "+rules);
|
|
41 |
|
|
42 |
System.out.println("Conversion rules: " + rules);
|
|
41 | 43 |
} |
42 |
|
|
44 |
|
|
43 | 45 |
public CorpusRuledConvertion(LinkedHashMap<Pattern, String> rules, |
44 | 46 |
String oldType, String newType) { |
45 | 47 |
this.oldType = oldType; |
46 | 48 |
this.newType = newType; |
47 |
|
|
49 |
|
|
48 | 50 |
this.rules = rules; |
49 | 51 |
} |
50 |
|
|
52 |
|
|
51 | 53 |
public boolean process(MainCorpus corpus) throws XMLStreamException, IOException { |
52 | 54 |
File binaryCorpusDirectory = corpus.getProjectDirectory(); |
53 | 55 |
File txmDirectory = new File(binaryCorpusDirectory, "txm"); |
54 | 56 |
if (!txmDirectory.exists()) { |
55 |
System.out.println("'txm' directory not found in "+binaryCorpusDirectory.getAbsolutePath());
|
|
57 |
System.out.println("'txm' directory not found in " + binaryCorpusDirectory.getAbsolutePath());
|
|
56 | 58 |
return false; |
57 | 59 |
} |
58 | 60 |
File txmCorpusDirectory = new File(txmDirectory, corpus.getID()); |
59 | 61 |
if (!txmCorpusDirectory.exists()) { |
60 |
System.out.println("'"+corpus.getName()+"' corpus directory not found in "+txmDirectory.getAbsolutePath());
|
|
62 |
System.out.println("'" + corpus.getName() + "' corpus directory not found in " + txmDirectory.getAbsolutePath());
|
|
61 | 63 |
return false; |
62 | 64 |
} |
63 | 65 |
File[] files = txmCorpusDirectory.listFiles(IOUtils.HIDDENFILE_FILTER); |
64 | 66 |
if (files == null || files.length == 0) { |
65 |
System.out.println("No file in "+txmCorpusDirectory);
|
|
67 |
System.out.println("No file in " + txmCorpusDirectory);
|
|
66 | 68 |
return false; |
67 | 69 |
} |
68 | 70 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
... | ... | |
71 | 73 |
if (xmlFile.isDirectory()) continue; |
72 | 74 |
if (xmlFile.isHidden()) continue; |
73 | 75 |
if (!xmlFile.getName().endsWith(".xml")) continue; |
74 |
|
|
75 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName());
|
|
76 |
|
|
77 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_" + xmlFile.getName());
|
|
76 | 78 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, XMLTXMFileRuledConversion.ABANDON); |
77 | 79 |
if (converter.process(tmpFile)) { |
78 | 80 |
xmlFile.delete(); |
79 |
// try {
|
|
80 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName()));
|
|
81 |
// } catch (IOException e) {
|
|
82 |
// // TODO Auto-generated catch block
|
|
83 |
// e.printStackTrace();
|
|
84 |
// }
|
|
85 |
|
|
81 |
// try {
|
|
82 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName()));
|
|
83 |
// } catch (IOException e) {
|
|
84 |
// // TODO Auto-generated catch block
|
|
85 |
// e.printStackTrace();
|
|
86 |
// }
|
|
87 |
|
|
86 | 88 |
HashSet<String> errors = converter.getNoMatchValues(); |
87 | 89 |
if (errors.size() > 0) { |
88 | 90 |
System.out.println("Some values did not match rule:"); |
89 | 91 |
int i = 0; |
90 | 92 |
for (String error : errors) { |
91 |
System.out.println("\t"+error);
|
|
93 |
System.out.println("\t" + error);
|
|
92 | 94 |
if (i >= 10) break; |
93 | 95 |
} |
94 | 96 |
if (errors.size() > 10) { |
95 | 97 |
try { |
96 | 98 |
File errorFile = new File(Toolbox.getTxmHomePath(), "errors.txt"); |
97 | 99 |
IOUtils.write(errorFile, StringUtils.join(errors, "\t")); |
98 |
System.out.println("More errors, see "+errorFile.getAbsolutePath()); |
|
99 |
} catch (Exception e) { |
|
100 |
System.out.println("More errors, see " + errorFile.getAbsolutePath()); |
|
101 |
} |
|
102 |
catch (Exception e) { |
|
100 | 103 |
e.printStackTrace(); |
101 | 104 |
} |
102 | 105 |
} |
103 | 106 |
return false; |
104 | 107 |
} |
105 |
|
|
108 |
|
|
106 | 109 |
tmpFile.renameTo(xmlFile); |
107 | 110 |
if (tmpFile.exists()) { |
108 |
System.out.println("Could not replace original file with the result file. "+xmlFile+ " with "+tmpFile);
|
|
111 |
System.out.println("Could not replace original file with the result file. " + xmlFile + " with " + tmpFile);
|
|
109 | 112 |
return false; |
110 | 113 |
} |
111 |
} else { |
|
112 |
System.out.println("Fail to process "+xmlFile); |
|
114 |
} |
|
115 |
else { |
|
116 |
System.out.println("Fail to process " + xmlFile); |
|
113 | 117 |
return false; |
114 | 118 |
} |
115 | 119 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 2396) | ||
---|---|---|
11 | 11 |
import org.txm.importer.StaxIdentityParser; |
12 | 12 |
|
13 | 13 |
public class XMLTXMFileRuledConversion extends StaxIdentityParser { |
14 |
|
|
14 |
|
|
15 | 15 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
16 |
|
|
16 | 17 |
protected String oldType; |
18 |
|
|
17 | 19 |
protected String newType; |
18 |
|
|
20 |
|
|
19 | 21 |
public static final String DELETE = "supprimer"; |
22 |
|
|
20 | 23 |
public static final String COPY = "copier"; |
24 |
|
|
21 | 25 |
public static final String ABANDON = "abandon"; |
26 |
|
|
22 | 27 |
HashSet<String> noMatchValues = new HashSet<String>(); |
23 | 28 |
|
24 | 29 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException { |
... | ... | |
29 | 34 |
|
30 | 35 |
this.mode = mode; |
31 | 36 |
|
32 |
if (!this.newType.startsWith("#")) this.newType = "#"+this.newType;
|
|
33 |
if (!this.oldType.startsWith("#")) this.oldType = "#"+this.oldType;
|
|
37 |
if (!this.newType.startsWith("#")) this.newType = "#" + this.newType;
|
|
38 |
if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType;
|
|
34 | 39 |
} |
35 |
|
|
40 |
|
|
36 | 41 |
boolean inW = false, inAna = false, inForm; |
42 |
|
|
37 | 43 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
44 |
|
|
38 | 45 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
46 |
|
|
39 | 47 |
String typeName = null; |
48 |
|
|
40 | 49 |
String respName = null; |
50 |
|
|
41 | 51 |
String formValue, typeValue = null; |
52 |
|
|
42 | 53 |
private Object mode; |
43 |
|
|
54 |
|
|
44 | 55 |
@Override |
45 | 56 |
public void processStartElement() throws XMLStreamException, IOException { |
46 | 57 |
if (!inW) super.processStartElement(); // don't write W content |
47 |
|
|
58 |
|
|
48 | 59 |
if (localname.equals("w")) { |
49 | 60 |
inW = true; |
50 | 61 |
anaValues.clear(); |
51 | 62 |
anaResps.clear(); |
52 |
|
|
53 |
//initialize the new type to a empty value in case there is transformation rule |
|
63 |
|
|
64 |
// initialize the new type to a empty value in case there is transformation rule
|
|
54 | 65 |
anaValues.put(newType, ""); |
55 | 66 |
anaResps.put(newType, "#txm_recode"); |
56 |
} else if (localname.equals("ana")) { |
|
67 |
} |
|
68 |
else if (localname.equals("ana")) { |
|
57 | 69 |
inAna = true; |
58 | 70 |
typeName = parser.getAttributeValue(null, "type"); |
59 | 71 |
respName = parser.getAttributeValue(null, "resp"); |
60 | 72 |
anaResps.put(typeName, respName); |
61 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
73 |
// if (typeName != null) typeName = typeName.substring(1); // remove #
|
|
62 | 74 |
typeValue = ""; |
63 |
} else if (localname.equals("form")) { |
|
75 |
} |
|
76 |
else if (localname.equals("form")) { |
|
64 | 77 |
inForm = true; |
65 | 78 |
formValue = ""; |
66 |
}
|
|
79 |
} |
|
67 | 80 |
} |
68 |
|
|
81 |
|
|
69 | 82 |
@Override |
70 | 83 |
public void processCharacters() throws XMLStreamException { |
71 |
if (inW && inAna) typeValue+=parser.getText();
|
|
72 |
else if (inW && inForm) formValue+=parser.getText();
|
|
84 |
if (inW && inAna) typeValue += parser.getText();
|
|
85 |
else if (inW && inForm) formValue += parser.getText();
|
|
73 | 86 |
else super.processCharacters(); |
74 | 87 |
} |
75 |
|
|
88 |
|
|
76 | 89 |
@Override |
77 | 90 |
public void processEndElement() throws XMLStreamException { |
78 | 91 |
if (localname.equals("w")) { |
79 | 92 |
inW = false; |
80 |
|
|
93 |
|
|
81 | 94 |
// write W content |
82 | 95 |
try { |
83 | 96 |
// get the value to test |
84 | 97 |
String value = null; |
85 | 98 |
if (oldType.equals("word")) { |
86 | 99 |
value = formValue; |
87 |
} else { |
|
100 |
} |
|
101 |
else { |
|
88 | 102 |
value = anaValues.get(oldType); |
89 | 103 |
} |
90 | 104 |
|
91 | 105 |
if (newType.equals("word")) { // update form property |
92 | 106 |
updateFormValueIfMatch(value); |
93 |
} else { // update another word property |
|
107 |
} |
|
108 |
else { // update another word property |
|
94 | 109 |
if (value != null) { |
95 | 110 |
updateAnaValuesIfMatch(value); |
96 | 111 |
} |
... | ... | |
100 | 115 |
writer.writeStartElement("txm:form"); |
101 | 116 |
writer.writeCharacters(formValue); |
102 | 117 |
writer.writeEndElement(); |
103 |
|
|
118 |
|
|
104 | 119 |
for (String k : anaValues.keySet()) { |
105 | 120 |
String resp = anaResps.get(k); |
106 | 121 |
if (resp == null) resp = "#txm_recode"; |
107 |
|
|
122 |
|
|
108 | 123 |
writer.writeStartElement("txm:ana"); |
109 | 124 |
writer.writeAttribute("resp", resp); |
110 | 125 |
writer.writeAttribute("type", k); |
111 | 126 |
writer.writeCharacters(anaValues.get(k)); |
112 | 127 |
writer.writeEndElement(); |
113 | 128 |
} |
114 |
} catch (XMLStreamException e) { |
|
129 |
} |
|
130 |
catch (XMLStreamException e) { |
|
115 | 131 |
e.printStackTrace(); |
116 | 132 |
} |
117 |
} else if (localname.equals("ana")) { |
|
133 |
} |
|
134 |
else if (localname.equals("ana")) { |
|
118 | 135 |
anaValues.put(typeName, typeValue); |
119 | 136 |
inAna = false; |
120 |
} else if (localname.equals("form")) { |
|
137 |
} |
|
138 |
else if (localname.equals("form")) { |
|
121 | 139 |
inForm = false; |
122 |
}
|
|
123 |
|
|
140 |
} |
|
141 |
|
|
124 | 142 |
if (!inW) super.processEndElement(); // don't write W content |
125 | 143 |
} |
126 |
|
|
144 |
|
|
127 | 145 |
protected void updateFormValueIfMatch(String value) { |
128 | 146 |
for (Pattern rule : rules.keySet()) { |
129 | 147 |
if (rule.matcher(value).matches()) { |
... | ... | |
131 | 149 |
return; // ok stop |
132 | 150 |
} |
133 | 151 |
} |
134 |
|
|
152 |
|
|
135 | 153 |
noMatchValues.add(value); |
136 | 154 |
} |
137 |
|
|
155 |
|
|
138 | 156 |
protected void updateAnaValuesIfMatch(String value) { |
139 | 157 |
for (Pattern rule : rules.keySet()) { |
140 | 158 |
if (rule.matcher(value).matches()) { |
... | ... | |
145 | 163 |
} |
146 | 164 |
} |
147 | 165 |
if (DELETE.equals(mode)) anaValues.put(newType, ""); // do a replace if newType == oldType :-) |
148 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR("+value+")"); // do a replace if newType == oldType :-)
|
|
166 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR(" + value + ")"); // do a replace if newType == oldType :-)
|
|
149 | 167 |
|
150 | 168 |
noMatchValues.add(value); |
151 | 169 |
} |
... | ... | |
153 | 171 |
public HashSet<String> getNoMatchValues() { |
154 | 172 |
return noMatchValues; |
155 | 173 |
} |
156 |
|
|
174 |
|
|
157 | 175 |
public static void main(String args[]) { |
158 | 176 |
try { |
159 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml"); |
|
160 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml"); |
|
161 |
String oldType = "type"; |
|
162 |
String newType = "type"; |
|
163 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
164 |
rules.put(Pattern.compile("w"), "WORD"); |
|
165 |
rules.put(Pattern.compile("x.+"), "XWORD"); |
|
166 |
rules.put(Pattern.compile("y"), "YWORD"); |
|
167 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD"); |
|
168 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
169 |
System.out.println(converter.process(tmpFile)); |
|
170 |
} catch (Exception e) { |
|
177 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml"); |
|
178 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml"); |
|
179 |
String oldType = "type"; |
|
180 |
String newType = "type"; |
|
181 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
182 |
rules.put(Pattern.compile("w"), "WORD"); |
|
183 |
rules.put(Pattern.compile("x.+"), "XWORD"); |
|
184 |
rules.put(Pattern.compile("y"), "YWORD"); |
|
185 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD"); |
|
186 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
187 |
System.out.println(converter.process(tmpFile)); |
|
188 |
} |
|
189 |
catch (Exception e) { |
|
171 | 190 |
e.printStackTrace(); |
172 | 191 |
} |
173 | 192 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationWriter.java (revision 2396) | ||
---|---|---|
33 | 33 |
|
34 | 34 |
|
35 | 35 |
public class AnnotationWriter { |
36 |
|
|
36 |
|
|
37 | 37 |
MainCorpus corpus; |
38 |
|
|
38 | 39 |
private List<AnnotationType> types; |
40 |
|
|
39 | 41 |
private KnowledgeRepository defaultKR; |
40 |
|
|
41 |
public AnnotationWriter(MainCorpus corpus) throws BackingStoreException{ |
|
42 |
|
|
43 |
public AnnotationWriter(MainCorpus corpus) throws BackingStoreException {
|
|
42 | 44 |
this.corpus = corpus; |
43 | 45 |
List<String> krnames = KRAnnotationEngine.getKnowledgeRepositoryNames(corpus); |
44 | 46 |
if (krnames.size() == 0) { |
45 | 47 |
Log.severe(NLS.bind("** Error: no knowledge repository found in {0} corpus.", corpus)); |
46 |
throw new IllegalArgumentException("No kr in "+corpus);
|
|
48 |
throw new IllegalArgumentException("No kr in " + corpus);
|
|
47 | 49 |
} |
48 | 50 |
String t = krnames.get(0); |
49 | 51 |
defaultKR = KRAnnotationEngine.getKnowledgeRepository(corpus, t); |
50 | 52 |
if (defaultKR == null) { |
51 | 53 |
Log.severe(NLS.bind("** Error: no knowledge repository {0} found in {0} corpus.", defaultKR, corpus)); |
52 |
throw new IllegalArgumentException("No kr "+defaultKR+" in "+corpus);
|
|
54 |
throw new IllegalArgumentException("No kr " + defaultKR + " in " + corpus);
|
|
53 | 55 |
} |
54 | 56 |
types = defaultKR.getAllAnnotationTypes(); |
55 | 57 |
|
56 | 58 |
} |
57 |
|
|
59 |
|
|
58 | 60 |
/** |
59 | 61 |
* process a text to build standoff files |
60 | 62 |
* |
... | ... | |
67 | 69 |
* @throws CqiServerError |
68 | 70 |
* @throws CqiClientException |
69 | 71 |
* @throws InvalidCqpIdException |
70 |
* @throws XMLStreamException
|
|
72 |
* @throws XMLStreamException |
|
71 | 73 |
*/ |
72 |
protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
73 |
System.out.println(" text="+textid); |
|
74 |
protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, |
|
75 |
InvalidCqpIdException, XMLStreamException { |
|
76 |
System.out.println(" text=" + textid); |
|
74 | 77 |
boolean show_debug = Log.getLevel().intValue() < Level.INFO.intValue(); |
75 | 78 |
|
76 | 79 |
AnnotationSyMoGIHWriter annotationstdoff = new AnnotationSyMoGIHWriter(textid, currentXMLFile, xmlStandOffDirectory, types, show_debug); |
77 | 80 |
|
78 |
///rather test on the new xml standoff files |
|
81 |
/// rather test on the new xml standoff files
|
|
79 | 82 |
if (annotationstdoff.process(currentXMLStandoffFile)) { |
80 |
if (ValidateXml.test(currentXMLStandoffFile)) { //TODO ALSO check if annotations are well-written
|
|
83 |
if (ValidateXml.test(currentXMLStandoffFile)) { // TODO ALSO check if annotations are well-written
|
|
81 | 84 |
return true; |
82 |
} else { |
|
83 |
System.out.println("Error: result file "+currentXMLStandoffFile+" is malformed."); |
|
84 | 85 |
} |
85 |
} else { |
|
86 |
System.out.println("Error: while processing "+currentXMLStandoffFile+" in standoff dir"); |
|
86 |
else { |
|
87 |
System.out.println("Error: result file " + currentXMLStandoffFile + " is malformed."); |
|
88 |
} |
|
87 | 89 |
} |
90 |
else { |
|
91 |
System.out.println("Error: while processing " + currentXMLStandoffFile + " in standoff dir"); |
|
92 |
} |
|
88 | 93 |
return false; |
89 | 94 |
|
90 | 95 |
} |
... | ... | |
98 | 103 |
* @throws CqiServerError |
99 | 104 |
* @throws CqiClientException |
100 | 105 |
* @throws InvalidCqpIdException |
101 |
* @throws XMLStreamException
|
|
106 |
* @throws XMLStreamException |
|
102 | 107 |
*/ |
103 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
108 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException {
|
|
104 | 109 |
|
105 | 110 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList()); |
106 |
System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+".");
|
|
107 |
|
|
108 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/"+corpus.getID()+"_annotations");
|
|
111 |
System.out.println("Exporting annotations of " + StringUtils.join(textsIds, ", ") + ".");
|
|
112 |
|
|
113 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/" + corpus.getID() + "_annotations");
|
|
109 | 114 |
DeleteDir.deleteDirectory(resultDirectory); |
110 | 115 |
resultDirectory.mkdirs(); |
111 | 116 |
if (!(resultDirectory.exists() && resultDirectory.canWrite())) { |
112 |
System.out.println("ERROR: could not create/write temporary directory: "+resultDirectory);
|
|
117 |
System.out.println("ERROR: could not create/write temporary directory: " + resultDirectory);
|
|
113 | 118 |
return false; |
114 | 119 |
} |
115 | 120 |
|
116 | 121 |
File inputDirectory = corpus.getProjectDirectory(); |
117 |
File corpusTxmDirectory = new File(inputDirectory, "txm/"+corpus.getID());
|
|
122 |
File corpusTxmDirectory = new File(inputDirectory, "txm/" + corpus.getID());
|
|
118 | 123 |
|
119 |
System.out.println("Writing annotations XML files in "+resultDirectory);
|
|
120 |
for (String textid : textsIds) {
|
|
121 |
File currentXMLFile = new File(corpusTxmDirectory, textid+".xml");
|
|
124 |
System.out.println("Writing annotations XML files in " + resultDirectory);
|
|
125 |
for (String textid : textsIds) { |
|
126 |
File currentXMLFile = new File(corpusTxmDirectory, textid + ".xml");
|
|
122 | 127 |
if (!currentXMLFile.exists()) { |
123 |
System.out.println("Error: cannot found XML file for text with id="+textid);
|
|
128 |
System.out.println("Error: cannot found XML file for text with id=" + textid);
|
|
124 | 129 |
return false; |
125 | 130 |
} |
126 |
File currentXMLStandoffFile = new File(resultDirectory, textid+".xml"); //To Be Changed ?
|
|
127 |
if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory )) {
|
|
128 |
System.out.println("Error while writing annotations of text "+currentXMLStandoffFile);
|
|
129 |
return false;
|
|
131 |
File currentXMLStandoffFile = new File(resultDirectory, textid + ".xml"); // To Be Changed ?
|
|
132 |
if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory)) { |
|
133 |
System.out.println("Error while writing annotations of text " + currentXMLStandoffFile);
|
|
134 |
return false; |
|
130 | 135 |
} |
131 | 136 |
} |
132 | 137 |
|
133 | 138 |
Zip.compress(resultDirectory, resultZipFile, new ConsoleProgressBar(1)); |
134 | 139 |
DeleteDir.deleteDirectory(resultDirectory); |
135 | 140 |
|
136 |
System.out.println("Annotations saved in "+resultZipFile.getAbsolutePath());
|
|
141 |
System.out.println("Annotations saved in " + resultZipFile.getAbsolutePath());
|
|
137 | 142 |
return resultZipFile.exists(); |
138 | 143 |
} |
139 | 144 |
|
140 | 145 |
/** |
141 | 146 |
* |
142 | 147 |
* @param allCorpusAnnotations ordered annotations |
143 |
* @param monitor
|
|
148 |
* @param monitor |
|
144 | 149 |
* @return |
145 | 150 |
* @throws IOException |
146 | 151 |
* @throws CqiServerError |
147 | 152 |
* @throws CqiClientException |
148 | 153 |
* @throws InvalidCqpIdException |
149 |
* @throws XMLStreamException
|
|
150 |
* @throws BackingStoreException
|
|
154 |
* @throws XMLStreamException |
|
155 |
* @throws BackingStoreException |
|
151 | 156 |
*/ |
152 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations, IProgressMonitor monitor) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException, BackingStoreException{ |
|
153 |
//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
|
157 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations, IProgressMonitor monitor) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException, |
|
158 |
BackingStoreException { |
|
159 |
// MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
|
154 | 160 |
Log.info(NLS.bind(KRAnnotationCoreMessages.savingP0Annotations, allCorpusAnnotations.size())); |
155 |
|
|
161 |
|
|
156 | 162 |
int[] end_limits = corpus.getTextEndLimits(); |
157 | 163 |
int[] start_limits = corpus.getTextStartLimits(); |
158 | 164 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList()); |
159 |
|
|
165 |
|
|
160 | 166 |
File inputDirectory = corpus.getProjectDirectory(); |
161 |
File txmDirectory = new File(inputDirectory, "txm/"+corpus.getID());
|
|
162 |
|
|
167 |
File txmDirectory = new File(inputDirectory, "txm/" + corpus.getID());
|
|
168 |
|
|
163 | 169 |
ArrayList<Annotation> textAnnotations = new ArrayList<Annotation>(); |
164 | 170 |
HashMap<String, ArrayList<Annotation>> annotationsPerTexts = new HashMap<String, ArrayList<Annotation>>(); |
165 |
|
|
171 |
|
|
166 | 172 |
int currentText = 0; |
167 |
File currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
168 |
|
|
169 |
//store first text |
|
170 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
173 |
File currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
174 |
|
|
175 |
// store first text
|
|
176 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
171 | 177 |
textAnnotations = new ArrayList<Annotation>(); |
172 | 178 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
173 |
|
|
179 |
|
|
174 | 180 |
// group annotations per text |
175 | 181 |
for (Annotation currentAnnot : allCorpusAnnotations) { // parse all annotations |
176 |
//System.out.println(" Annotation: "+currentAnnot); |
|
182 |
// System.out.println(" Annotation: "+currentAnnot);
|
|
177 | 183 |
int pos = currentAnnot.getPK().getEndPosition(); |
178 |
|
|
179 |
while (pos > end_limits[currentText]) { // while pos is not in the currentText.end
|
|
184 |
|
|
185 |
while (pos > end_limits[currentText]) { // while pos is not in the currentText.end |
|
180 | 186 |
currentText++; |
181 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
187 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
182 | 188 |
textAnnotations = new ArrayList<Annotation>(); |
183 | 189 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
184 | 190 |
} |
185 |
|
|
191 |
|
|
186 | 192 |
textAnnotations.add(currentAnnot); |
187 | 193 |
} |
188 |
|
|
189 |
File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_tmp");
|
|
194 |
|
|
195 |
File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath() + "_tmp");
|
|
190 | 196 |
DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
191 | 197 |
tmpXMLTXMDirectory.mkdirs(); |
192 | 198 |
if (!(tmpXMLTXMDirectory.exists() && tmpXMLTXMDirectory.canWrite())) { |
193 |
System.out.println("ERROR: could not create directory: "+tmpXMLTXMDirectory);
|
|
199 |
System.out.println("ERROR: could not create directory: " + tmpXMLTXMDirectory);
|
|
194 | 200 |
return false; |
195 | 201 |
} |
196 |
|
|
197 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous");
|
|
198 |
//DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
|
202 |
|
|
203 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath() + "_previous");
|
|
204 |
// DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
|
|
199 | 205 |
previousXMLTXMDirectory.mkdirs(); |
200 |
if (!(previousXMLTXMDirectory.exists() && previousXMLTXMDirectory.canWrite())) {
|
|
201 |
System.out.println("ERROR: could not create directory: "+previousXMLTXMDirectory);
|
|
206 |
if (!(previousXMLTXMDirectory.exists() && previousXMLTXMDirectory.canWrite())) { |
|
207 |
System.out.println("ERROR: could not create directory: " + previousXMLTXMDirectory);
|
|
202 | 208 |
return false; |
203 | 209 |
} |
204 |
|
|
205 |
Log.fine("Annotations grouped per text for "+annotationsPerTexts.size()+" text"+(annotationsPerTexts.size() > 0?"s":""));
|
|
206 |
Log.fine(" - Writing temporary XML files in: "+tmpXMLTXMDirectory);
|
|
207 |
Log.fine(" - Copying previous version of XML files in: "+previousXMLTXMDirectory);
|
|
210 |
|
|
211 |
Log.fine("Annotations grouped per text for " + annotationsPerTexts.size() + " text" + (annotationsPerTexts.size() > 0 ? "s" : ""));
|
|
212 |
Log.fine(" - Writing temporary XML files in: " + tmpXMLTXMDirectory);
|
|
213 |
Log.fine(" - Copying previous version of XML files in: " + previousXMLTXMDirectory);
|
|
208 | 214 |
// for all annotation PER TEXT, update the XML-TXM files |
209 | 215 |
currentText = 0; |
210 |
while (currentText < end_limits.length ) { // end limits : 10, 30, 45, 55, 103
|
|
211 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
216 |
while (currentText < end_limits.length) { // end limits : 10, 30, 45, 55, 103 |
|
217 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
212 | 218 |
ArrayList<Annotation> allAnnotations = annotationsPerTexts.get(textsIds.get(currentText)); |
213 | 219 |
if (allAnnotations != null && allAnnotations.size() > 0) { |
214 | 220 |
ArrayList<Annotation> allSegmentAnnotations = new ArrayList<Annotation>(); |
215 | 221 |
ArrayList<Annotation> allTokenAnnotations = new ArrayList<Annotation>(); |
216 |
|
|
217 |
//System.out.println("Using KR="+defaultKR); |
|
222 |
|
|
223 |
// System.out.println("Using KR="+defaultKR);
|
|
218 | 224 |
for (Annotation a : allAnnotations) { |
219 | 225 |
AnnotationType type = defaultKR.getType(a.getType()); |
220 | 226 |
if (type != null) { |
221 | 227 |
if (AnnotationEffect.SEGMENT.equals(type.getEffect())) { |
222 | 228 |
allSegmentAnnotations.add(a); |
223 |
} else if (AnnotationEffect.TOKEN.equals(type.getEffect())) { |
|
229 |
} |
|
230 |
else if (AnnotationEffect.TOKEN.equals(type.getEffect())) { |
|
224 | 231 |
allTokenAnnotations.add(a); |
225 |
} else { |
|
226 |
Log.fine("Annotation "+a+" with type="+a.getType()+" not found in default KR="+defaultKR); |
|
227 | 232 |
} |
228 |
} else { |
|
229 |
Log.warning("Warning: unknowed type: "+a.getType()); |
|
233 |
else { |
|
234 |
Log.fine("Annotation " + a + " with type=" + a.getType() + " not found in default KR=" + defaultKR); |
|
235 |
} |
|
230 | 236 |
} |
237 |
else { |
|
238 |
Log.warning("Warning: unknowed type: " + a.getType()); |
|
239 |
} |
|
231 | 240 |
} |
232 |
|
|
233 |
if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText],
|
|
234 |
allSegmentAnnotations, allTokenAnnotations,
|
|
241 |
|
|
242 |
if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText], |
|
243 |
allSegmentAnnotations, allTokenAnnotations, |
|
235 | 244 |
tmpXMLTXMDirectory, previousXMLTXMDirectory)) { |
236 |
Log.severe("Error while writing annotations of text "+currentXMLFile);
|
|
245 |
Log.severe("Error while writing annotations of text " + currentXMLFile);
|
|
237 | 246 |
return false; |
238 |
} else { |
|
247 |
} |
|
248 |
else { |
|
239 | 249 |
if (monitor != null) { |
240 |
monitor.worked(allSegmentAnnotations.size()+allTokenAnnotations.size());
|
|
250 |
monitor.worked(allSegmentAnnotations.size() + allTokenAnnotations.size());
|
|
241 | 251 |
} |
242 | 252 |
} |
243 | 253 |
} |
244 | 254 |
currentText++; |
245 | 255 |
} |
246 |
|
|
256 |
|
|
247 | 257 |
return true; |
248 |
} |
|
249 |
|
|
250 |
//writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory)) |
|
258 |
} |
|
251 | 259 |
|
252 |
protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, |
|
253 |
ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException, XMLStreamException{ |
|
254 |
Log.fine("Writing annotations for text "+xmlFile+" segment annotations="+segmentAnnotations.size()+" token annotations="+tokenAnnotations.size()); |
|
255 |
//System.out.println(segmentAnnotations); |
|
256 |
//System.out.println(tokenAnnotations); |
|
257 |
|
|
260 |
// writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory)) |
|
261 |
|
|
262 |
protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, |
|
263 |
ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, |
|
264 |
CqiServerError, InvalidCqpIdException, XMLStreamException { |
|
265 |
Log.fine("Writing annotations for text " + xmlFile + " segment annotations=" + segmentAnnotations.size() + " token annotations=" + tokenAnnotations.size()); |
|
266 |
// System.out.println(segmentAnnotations); |
|
267 |
// System.out.println(tokenAnnotations); |
|
268 |
|
|
258 | 269 |
boolean show_debug = Log.getLevel().intValue() < Level.INFO.intValue(); |
259 | 270 |
AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, segmentAnnotations, tokenAnnotations, text_start_position, show_debug); |
260 |
|
|
271 |
|
|
261 | 272 |
File tmpfile = new File(tmpXMLTXMDirectory, xmlFile.getName()); |
262 | 273 |
File previousfile = new File(previousXMLTXMDirectory, xmlFile.getName()); |
263 |
|
|
274 |
|
|
264 | 275 |
if (annotationInjector.process(tmpfile)) { |
265 |
if (ValidateXml.test(tmpfile)) { //TODO ALSO check if annotations are well-written
|
|
276 |
if (ValidateXml.test(tmpfile)) { // TODO ALSO check if annotations are well-written
|
|
266 | 277 |
previousfile.delete(); // in case there is one |
267 | 278 |
if (!previousfile.exists() && xmlFile.renameTo(previousfile)) { |
268 | 279 |
tmpfile.renameTo(xmlFile); |
269 | 280 |
return true; |
270 |
} else { |
|
271 |
Log.severe("Error: could not replace XML-TXM file: "+xmlFile+" with "+tmpfile); |
|
272 | 281 |
} |
273 |
} else { |
|
274 |
Log.severe("Error: result file "+tmpfile+" is malformed."); |
|
282 |
else { |
|
283 |
Log.severe("Error: could not replace XML-TXM file: " + xmlFile + " with " + tmpfile); |
|
284 |
} |
|
275 | 285 |
} |
276 |
} else { |
|
277 |
Log.severe("Error: while processing "+xmlFile+" in temp dir"); |
Formats disponibles : Unified diff