Révision 2396
| tmp/org.txm.ca.rcp/.classpath (revision 2396) | ||
|---|---|---|
| 1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
| 2 | 2 |
<classpath> |
| 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
| 5 |
<accessrules> |
|
| 6 |
<accessrule kind="accessible" pattern="**"/> |
|
| 7 |
</accessrules> |
|
| 8 |
</classpathentry> |
|
| 9 |
<classpathentry kind="src" path="src"/> |
|
| 10 |
<classpathentry kind="output" path="bin"/> |
|
| 3 |
<classpathentry kind="con" |
|
| 4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
| 5 |
<classpathentry kind="con" |
|
| 6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
| 7 |
<accessrules> |
|
| 8 |
<accessrule kind="accessible" pattern="**" /> |
|
| 9 |
</accessrules> |
|
| 10 |
</classpathentry> |
|
| 11 |
<classpathentry kind="src" path="src" /> |
|
| 12 |
<classpathentry kind="output" path="bin" /> |
|
| 11 | 13 |
</classpath> |
| tmp/org.txm.ca.core/.classpath (revision 2396) | ||
|---|---|---|
| 1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
| 2 | 2 |
<classpath> |
| 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
| 5 |
<accessrules> |
|
| 6 |
<accessrule kind="accessible" pattern="**"/> |
|
| 7 |
</accessrules> |
|
| 8 |
</classpathentry> |
|
| 9 |
<classpathentry kind="src" path="src"/> |
|
| 10 |
<classpathentry kind="output" path="bin"/> |
|
| 3 |
<classpathentry kind="con" |
|
| 4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
| 5 |
<classpathentry kind="con" |
|
| 6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
| 7 |
<accessrules> |
|
| 8 |
<accessrule kind="accessible" pattern="**" /> |
|
| 9 |
</accessrules> |
|
| 10 |
</classpathentry> |
|
| 11 |
<classpathentry kind="src" path="src" /> |
|
| 12 |
<classpathentry kind="output" path="bin" /> |
|
| 11 | 13 |
</classpath> |
| tmp/org.txm.ca.core/src/org/txm/ca/core/functions/package.html (revision 2396) | ||
|---|---|---|
| 1 | 1 |
<html> |
| 2 | 2 |
<body> |
| 3 |
<p>Correspondance analysis.</p> |
|
| 3 |
<p>Correspondance analysis.</p>
|
|
| 4 | 4 |
</body> |
| 5 | 5 |
</html> |
| tmp/org.txm.annotation.kr.core/.classpath (revision 2396) | ||
|---|---|---|
| 1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
| 2 | 2 |
<classpath> |
| 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> |
|
| 4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
| 5 |
<accessrules> |
|
| 6 |
<accessrule kind="accessible" pattern="**"/> |
|
| 7 |
</accessrules> |
|
| 8 |
</classpathentry> |
|
| 9 |
<classpathentry kind="src" path="src"/> |
|
| 10 |
<classpathentry kind="lib" path="libs/hsqldb.jar"/> |
|
| 11 |
<classpathentry kind="lib" path="libs/postgresql-9.4.1207.jre6.jar"/> |
|
| 12 |
<classpathentry kind="lib" path="libs/sqlite-jdbc-3.8.11.2.jar"/> |
|
| 13 |
<classpathentry kind="output" path="bin"/> |
|
| 3 |
<classpathentry kind="con" |
|
| 4 |
path="org.eclipse.jdt.launching.JRE_CONTAINER" /> |
|
| 5 |
<classpathentry kind="con" |
|
| 6 |
path="org.eclipse.pde.core.requiredPlugins"> |
|
| 7 |
<accessrules> |
|
| 8 |
<accessrule kind="accessible" pattern="**" /> |
|
| 9 |
</accessrules> |
|
| 10 |
</classpathentry> |
|
| 11 |
<classpathentry kind="src" path="src" /> |
|
| 12 |
<classpathentry kind="lib" path="libs/hsqldb.jar" /> |
|
| 13 |
<classpathentry kind="lib" |
|
| 14 |
path="libs/postgresql-9.4.1207.jre6.jar" /> |
|
| 15 |
<classpathentry kind="lib" |
|
| 16 |
path="libs/sqlite-jdbc-3.8.11.2.jar" /> |
|
| 17 |
<classpathentry kind="output" path="bin" /> |
|
| 14 | 18 |
</classpath> |
| tmp/org.txm.annotation.kr.core/META-INF/persistence.xml (revision 2396) | ||
|---|---|---|
| 1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
| 2 |
<persistence version="2.1" xmlns="http://xmlns.jcp.org/xml/ns/persistence" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
| 3 |
<persistence-unit name="HSQLKRPERSISTENCE" transaction-type="RESOURCE_LOCAL"> |
|
| 2 |
<persistence version="2.1" |
|
| 3 |
xmlns="http://xmlns.jcp.org/xml/ns/persistence" |
|
| 4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
| 5 |
xsi:schemaLocation="http://xmlns.jcp.org/xml/ns/persistence http://xmlns.jcp.org/xml/ns/persistence/persistence_2_1.xsd"> |
|
| 6 |
<persistence-unit name="HSQLKRPERSISTENCE" |
|
| 7 |
transaction-type="RESOURCE_LOCAL"> |
|
| 4 | 8 |
<provider>org.eclipse.persistence.jpa.PersistenceProvider</provider> |
| 5 |
|
|
| 9 |
|
|
| 6 | 10 |
<class>org.txm.annotation.kr.core.Annotation</class> |
| 7 | 11 |
<class>org.txm.annotation.kr.core.repository.AnnotationType</class> |
| 8 | 12 |
<class>org.txm.annotation.kr.core.repository.TypedValue</class> |
| 9 |
<!-- |
|
| 10 |
<class>org.txm.functions.dictionary_jpa.EntryId</class> |
|
| 11 |
<class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
| 12 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> |
|
| 13 |
--> |
|
| 13 |
<!-- <class>org.txm.functions.dictionary_jpa.EntryId</class> <class>org.txm.functions.dictionary_jpa.EntryProperty</class> |
|
| 14 |
<class>org.txm.functions.dictionary_jpa.EntryPropertyType</class> --> |
|
| 14 | 15 |
<properties> |
| 15 |
<property name="javax.persistence.jdbc.driver" value="org.hsqldb.jdbcDriver"/> |
|
| 16 |
<property name="javax.persistence.jdbc.url" value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0"/> |
|
| 17 |
<property name="javax.persistence.jdbc.user" value="SA"/> |
|
| 18 |
|
|
| 19 |
<property name="eclipselink.logging.level" value="OFF"/> |
|
| 20 |
<!-- |
|
| 21 |
<property name="eclipselink.jdbc.read-connections.min" value="1" /> |
|
| 22 |
<property name="eclipselink.jdbc.write-connections.min" value="1" /> |
|
| 23 |
<property name="eclipselink.jdbc.batch-writing" value="JDBC" /> |
|
| 24 |
--> |
|
| 16 |
<property name="javax.persistence.jdbc.driver" |
|
| 17 |
value="org.hsqldb.jdbcDriver" /> |
|
| 18 |
<property name="javax.persistence.jdbc.url" |
|
| 19 |
value="jdbc:hsqldb:file:txmdb/db;shutdown=true;hsqldb.write_delay=false;hsqldb.lock_file=false;hsqldb.applog=0;hsqldb.sqllog=0" /> |
|
| 20 |
<property name="javax.persistence.jdbc.user" value="SA" /> |
|
| 25 | 21 |
|
| 26 |
<!-- Logging |
|
| 27 |
<property name="eclipselink.logging.file" value="output.log"/> |
|
| 28 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> |
|
| 22 |
<property name="eclipselink.logging.level" value="OFF" /> |
|
| 23 |
<!-- <property name="eclipselink.jdbc.read-connections.min" value="1" |
|
| 24 |
/> <property name="eclipselink.jdbc.write-connections.min" value="1" /> <property |
|
| 25 |
name="eclipselink.jdbc.batch-writing" value="JDBC" /> --> |
|
| 29 | 26 |
|
| 30 |
<property name="eclipselink.logging.parameters" value="false"/>
|
|
| 31 |
<property name="eclipselink.logging.level" value="FINE" />
|
|
| 32 |
<property name="eclipselink.logging.timestamp" value="false" />
|
|
| 33 |
<property name="eclipselink.logging.session" value="false" />
|
|
| 34 |
<property name="eclipselink.logging.thread" value="false" />
|
|
| 35 |
--> |
|
| 36 |
|
|
| 27 |
<!-- Logging <property name="eclipselink.logging.file" value="output.log"/>
|
|
| 28 |
<property name="eclipselink.logging.logger" value="JavaLogger"/> <property
|
|
| 29 |
name="eclipselink.logging.parameters" value="false"/> <property name="eclipselink.logging.level"
|
|
| 30 |
value="FINE" /> <property name="eclipselink.logging.timestamp" value="false"
|
|
| 31 |
/> <property name="eclipselink.logging.session" value="false" /> <property
|
|
| 32 |
name="eclipselink.logging.thread" value="false" /> -->
|
|
| 33 |
|
|
| 37 | 34 |
<!-- <property name="eclipselink.ddl-generation" value="drop-and-create-tables"/> --> |
| 38 | 35 |
<!-- <property name="eclipselink.ddl-generation.output-mode" value="database"/> --> |
| 39 | 36 |
</properties> |
| 40 |
|
|
| 37 |
|
|
| 41 | 38 |
</persistence-unit> |
| 42 | 39 |
</persistence> |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/DeleteAnnotation.java (revision 2396) | ||
|---|---|---|
| 5 | 5 |
import org.junit.Test; |
| 6 | 6 |
|
| 7 | 7 |
public class DeleteAnnotation {
|
| 8 |
|
|
| 8 |
|
|
| 9 | 9 |
@Test |
| 10 | 10 |
public void test() {
|
| 11 | 11 |
fail("Not yet implemented");
|
| 12 | 12 |
} |
| 13 |
|
|
| 13 |
|
|
| 14 | 14 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/UpdateAnnotation.java (revision 2396) | ||
|---|---|---|
| 5 | 5 |
import org.junit.Test; |
| 6 | 6 |
|
| 7 | 7 |
public class UpdateAnnotation {
|
| 8 |
|
|
| 8 |
|
|
| 9 | 9 |
@Test |
| 10 | 10 |
public void test() {
|
| 11 | 11 |
fail("Not yet implemented");
|
| 12 | 12 |
} |
| 13 |
|
|
| 13 |
|
|
| 14 | 14 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/CreateAnnotation.java (revision 2396) | ||
|---|---|---|
| 13 | 13 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
| 14 | 14 |
|
| 15 | 15 |
public class CreateAnnotation {
|
| 16 |
|
|
| 16 |
|
|
| 17 | 17 |
@Test |
| 18 | 18 |
public void test() throws CqiClientException, InvalidCqpIdException {
|
| 19 | 19 |
if (!Toolbox.isInitialized()) fail("Toolbox not initialized.");
|
| ... | ... | |
| 21 | 21 |
if (corpus == null) fail("Corpus Voeux not loaded.");
|
| 22 | 22 |
|
| 23 | 23 |
HashMap<String, Object> properties = TemporaryAnnotationManager.getInitialisationProperties(this.getClass(), corpus); |
| 24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home")+"/workspace442/org.txm.core/META-INF/persistence.xml");
|
|
| 24 |
properties.put("eclipselink.persistencexml", System.getProperty("user.home") + "/workspace442/org.txm.core/META-INF/persistence.xml");
|
|
| 25 | 25 |
|
| 26 | 26 |
TemporaryAnnotationManager tam = new TemporaryAnnotationManager(corpus, properties); |
| 27 | 27 |
System.out.println(tam); |
| 28 | 28 |
} |
| 29 |
|
|
| 29 |
|
|
| 30 | 30 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/temporary/AllTests.java (revision 2396) | ||
|---|---|---|
| 9 | 9 |
@SuiteClasses({ StartToolbox.class, CreateAnnotation.class, DeleteAnnotation.class,
|
| 10 | 10 |
UpdateAnnotation.class }) |
| 11 | 11 |
public class AllTests {
|
| 12 |
|
|
| 12 |
|
|
| 13 | 13 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationComparator.java (revision 2396) | ||
|---|---|---|
| 6 | 6 |
import java.util.StringTokenizer; |
| 7 | 7 |
|
| 8 | 8 |
public class AnnotationComparator implements Comparator<Annotation> {
|
| 9 |
|
|
| 9 |
|
|
| 10 | 10 |
public int compare(Annotation a1, Annotation a2) {
|
| 11 |
// comparer e1 et e2
|
|
| 11 |
// comparer e1 et e2
|
|
| 12 | 12 |
|
| 13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd");
|
|
| 14 |
//System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate()));
|
|
| 15 |
//System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate()));
|
|
| 13 |
DateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd");
|
|
| 14 |
// System.out.println("Date 1 : "+a1.getDate() + " => "+dateformat.format(a1.getDate()));
|
|
| 15 |
// System.out.println("Date 2 : "+a2.getDate() + " => "+dateformat.format(a2.getDate()));
|
|
| 16 | 16 |
StringTokenizer tokenizer1 = new StringTokenizer(dateformat.format(a1.getDate()), "-"); |
| 17 | 17 |
StringTokenizer tokenizer2 = new StringTokenizer(dateformat.format(a2.getDate()), "-"); |
| 18 | 18 |
|
| 19 |
for(int i = 0 ; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens() ; ++i) {
|
|
| 19 |
for (int i = 0; i < 2 && tokenizer1.hasMoreTokens() && tokenizer2.hasMoreTokens(); ++i) {
|
|
| 20 | 20 |
String token1 = tokenizer1.nextToken(); |
| 21 | 21 |
String token2 = tokenizer2.nextToken(); |
| 22 | 22 |
int valint1 = new Integer(token1).intValue(); |
| 23 | 23 |
int valint2 = new Integer(token2).intValue(); |
| 24 |
//System.out.println(valint1+" | "+valint2); |
|
| 25 |
if (valint1<valint2){
|
|
| 24 |
// System.out.println(valint1+" | "+valint2);
|
|
| 25 |
if (valint1 < valint2) {
|
|
| 26 | 26 |
return -1; |
| 27 |
}else {
|
|
| 28 |
if(valint1>valint2){
|
|
| 27 |
} |
|
| 28 |
else {
|
|
| 29 |
if (valint1 > valint2) {
|
|
| 29 | 30 |
return 1; |
| 30 |
}else {
|
|
| 31 |
//System.out.println("idem ["+i+"]");
|
|
| 32 | 31 |
} |
| 32 |
else {
|
|
| 33 |
// System.out.println("idem ["+i+"]");
|
|
| 34 |
} |
|
| 33 | 35 |
|
| 34 | 36 |
} |
| 35 | 37 |
} |
| 36 | 38 |
|
| 37 | 39 |
return 0; |
| 38 |
|
|
| 39 |
}
|
|
| 40 |
|
|
| 41 |
} |
|
| 40 | 42 |
|
| 41 | 43 |
public static void main(String[] args) {
|
| 42 | 44 |
AnnotationComparator comp = new AnnotationComparator(); |
| 43 | 45 |
Annotation a1 = new Annotation("truc", "bidule", 3, 10);
|
| 44 | 46 |
Annotation a2 = new Annotation("truc", "bidule", 6, 8);
|
| 45 | 47 |
int ret = comp.compare(a1, a2); |
| 46 |
System.out.println("Le retour est : "+ret);
|
|
| 48 |
System.out.println("Le retour est : " + ret);
|
|
| 47 | 49 |
} |
| 48 | 50 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/DatabasePersistenceManager.java (revision 2396) | ||
|---|---|---|
| 5 | 5 |
import javax.persistence.EntityManager; |
| 6 | 6 |
|
| 7 | 7 |
public class DatabasePersistenceManager {
|
| 8 |
|
|
| 9 |
//Object can be Corpus or KnowledgeRepository |
|
| 8 |
|
|
| 9 |
// Object can be Corpus or KnowledgeRepository
|
|
| 10 | 10 |
protected HashMap<Object, EntityManager> managers; |
| 11 |
|
|
| 11 | 12 |
public static final String PERSISTENCE_UNIT_NAME = "HSQLKRPERSISTENCE"; |
| 13 |
|
|
| 12 | 14 |
public static String ACCESS_SQL = "sql"; |
| 15 |
|
|
| 13 | 16 |
public static String ACCESS_FILE = "file"; |
| 17 |
|
|
| 14 | 18 |
public static String ACCESS_SPARQL = "sparql"; |
| 15 |
|
|
| 19 |
|
|
| 16 | 20 |
/** |
| 17 | 21 |
* Instantiates a new database manager. |
| 18 | 22 |
*/ |
| 19 | 23 |
public DatabasePersistenceManager() {
|
| 20 |
managers = new HashMap<Object, EntityManager>() ;
|
|
| 24 |
managers = new HashMap<Object, EntityManager>(); |
|
| 21 | 25 |
} |
| 22 |
|
|
| 26 |
|
|
| 23 | 27 |
/** |
| 24 | 28 |
* The Object can be a Corpus or a KnowledgeRepository |
| 29 |
* |
|
| 25 | 30 |
* @param obj |
| 26 | 31 |
* @return |
| 27 | 32 |
*/ |
| 28 |
public EntityManager getJPAEntityManager(Object obj){
|
|
| 33 |
public EntityManager getJPAEntityManager(Object obj) {
|
|
| 29 | 34 |
if (managers.containsKey(obj)) {
|
| 30 | 35 |
return managers.get(obj); |
| 31 | 36 |
} |
| 32 | 37 |
return null; |
| 33 | 38 |
} |
| 34 |
|
|
| 39 |
|
|
| 35 | 40 |
public void closeManager(Object key) {
|
| 36 | 41 |
if (!managers.keySet().contains(key)) return; |
| 37 |
|
|
| 42 |
|
|
| 38 | 43 |
managers.get(key).close(); |
| 39 | 44 |
managers.remove(key); |
| 40 | 45 |
} |
| 41 |
|
|
| 46 |
|
|
| 42 | 47 |
public void closeAllManagers() {
|
| 43 | 48 |
for (Object key : managers.keySet()) {
|
| 44 | 49 |
EntityManager m = managers.get(key); |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationManager.java (revision 2396) | ||
|---|---|---|
| 25 | 25 |
* |
| 26 | 26 |
*/ |
| 27 | 27 |
public class AnnotationManager {
|
| 28 |
|
|
| 28 |
|
|
| 29 | 29 |
MainCorpus corpus; |
| 30 |
|
|
| 30 | 31 |
TemporaryAnnotationManager tempManager; |
| 32 |
|
|
| 31 | 33 |
CQPAnnotationManager cqpManager; |
| 34 |
|
|
| 32 | 35 |
boolean dirty = false; |
| 33 |
|
|
| 34 |
public AnnotationManager(MainCorpus mainCorpus){
|
|
| 36 |
|
|
| 37 |
public AnnotationManager(MainCorpus mainCorpus) {
|
|
| 35 | 38 |
this.corpus = mainCorpus; |
| 36 | 39 |
} |
| 37 |
|
|
| 38 |
public TemporaryAnnotationManager getTemporaryManager(){
|
|
| 40 |
|
|
| 41 |
public TemporaryAnnotationManager getTemporaryManager() {
|
|
| 39 | 42 |
return tempManager; |
| 40 | 43 |
} |
| 41 | 44 |
|
| 42 |
public CQPAnnotationManager getCQPManager(){
|
|
| 45 |
public CQPAnnotationManager getCQPManager() {
|
|
| 43 | 46 |
return cqpManager; |
| 44 | 47 |
} |
| 45 | 48 |
|
| ... | ... | |
| 56 | 59 |
monitor.beginTask(KRAnnotationCoreMessages.savingAnnotations, annots.size()); |
| 57 | 60 |
monitor.setTaskName("writing annotations in XML-TXM files");
|
| 58 | 61 |
} |
| 59 |
|
|
| 62 |
|
|
| 60 | 63 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
| 61 | 64 |
if (writer.writeAnnotations(annots, monitor)) {
|
| 62 | 65 |
Log.info(KRAnnotationCoreMessages.annotationSuccesfullyWritten); |
| ... | ... | |
| 66 | 69 |
} |
| 67 | 70 |
return false; |
| 68 | 71 |
} |
| 69 |
|
|
| 70 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception{
|
|
| 72 |
|
|
| 73 |
public boolean exportAnnotationsToSyMoGIH(File resultZipFile) throws Exception {
|
|
| 71 | 74 |
AnnotationWriter writer = new AnnotationWriter(corpus); |
| 72 | 75 |
|
| 73 | 76 |
if (writer.writeAnnotationsInStandoff(resultZipFile)) {
|
| ... | ... | |
| 77 | 80 |
return false; |
| 78 | 81 |
} |
| 79 | 82 |
|
| 80 |
//TODO: not ended? |
|
| 83 |
// TODO: not ended?
|
|
| 81 | 84 |
/** |
| 82 | 85 |
* Deletes the annotations stored in the temporary annotation manager |
| 86 |
* |
|
| 83 | 87 |
* @param type |
| 84 | 88 |
* @param job |
| 85 | 89 |
* @return |
| ... | ... | |
| 91 | 95 |
try {
|
| 92 | 96 |
temporaryAnnotations = tempManager.getAnnotations(type); |
| 93 | 97 |
tempManager.getEntityManager().getTransaction().begin(); |
| 94 |
for (Annotation a : temporaryAnnotations){
|
|
| 98 |
for (Annotation a : temporaryAnnotations) {
|
|
| 95 | 99 |
if (job != null && job.isCanceled()) {
|
| 96 | 100 |
System.out.println("Delete annotation canceled.");
|
| 97 | 101 |
return false; |
| ... | ... | |
| 109 | 113 |
} |
| 110 | 114 |
String value = cqpManager.getCQPAnnotationValue(a.getStart(), a.getEnd(), type); |
| 111 | 115 |
if (value != null) {
|
| 112 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd());
|
|
| 113 |
} else {
|
|
| 116 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), a.getStart(), a.getEnd());
|
|
| 117 |
} |
|
| 118 |
else {
|
|
| 114 | 119 |
tempManager.deleteAnnotationNoCommit(type, a.getStart(), a.getEnd()); |
| 115 | 120 |
} |
| 116 | 121 |
} |
| 117 | 122 |
dirty = true; |
| 118 | 123 |
tempManager.getEntityManager().getTransaction().commit(); |
| 119 |
} catch(Exception e) {
|
|
| 124 |
} |
|
| 125 |
catch (Exception e) {
|
|
| 120 | 126 |
e.printStackTrace(); |
| 121 | 127 |
return false; |
| 122 | 128 |
} |
| ... | ... | |
| 131 | 137 |
int start, end; |
| 132 | 138 |
if (match.getTarget() >= 0) {
|
| 133 | 139 |
start = end = match.getTarget(); |
| 134 |
} else {
|
|
| 140 |
} |
|
| 141 |
else {
|
|
| 135 | 142 |
start = match.getStart(); |
| 136 | 143 |
end = match.getEnd(); |
| 137 | 144 |
if (type.getEffect().equals(AnnotationEffect.TOKEN)) {
|
| ... | ... | |
| 145 | 152 |
} |
| 146 | 153 |
|
| 147 | 154 |
String value = cqpManager.getCQPAnnotationValue(start, end, type); |
| 148 |
|
|
| 155 |
|
|
| 149 | 156 |
if (value != null) {
|
| 150 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), start, end);
|
|
| 151 |
} else {
|
|
| 157 |
tempManager.createAnnotationNoCommit(type, new TypedValue("#del", "#del", type.getId()), start, end);
|
|
| 158 |
} |
|
| 159 |
else {
|
|
| 152 | 160 |
tempManager.deleteAnnotationNoCommit(type, start, end); |
| 153 | 161 |
} |
| 154 | 162 |
} |
| 155 | 163 |
dirty = true; |
| 156 | 164 |
tempManager.getEntityManager().getTransaction().commit(); |
| 157 |
} catch (Exception e) {
|
|
| 165 |
} |
|
| 166 |
catch (Exception e) {
|
|
| 158 | 167 |
e.printStackTrace(); |
| 159 | 168 |
return false; |
| 160 | 169 |
} |
| 161 | 170 |
return true; |
| 162 | 171 |
} |
| 163 |
|
|
| 172 |
|
|
| 164 | 173 |
/** |
| 165 | 174 |
* Returns the annotation saved in the temporary database and in the CQP corpus indexes |
| 166 | 175 |
* |
| ... | ... | |
| 174 | 183 |
temporaryAnnotations = tempManager.getAnnotationsForMatches(matches, temporaryAnnotations, overlap); |
| 175 | 184 |
|
| 176 | 185 |
List<? extends Annotation> cqpAnnotations = cqpManager.getAnnotationsForMatches(matches, type, overlap); |
| 177 |
|
|
| 178 |
// System.out.println("Temporary annotations: "+temporaryAnnotations);
|
|
| 179 |
// System.out.println("CQP annotations: "+cqpAnnotations);
|
|
| 186 |
|
|
| 187 |
// System.out.println("Temporary annotations: "+temporaryAnnotations);
|
|
| 188 |
// System.out.println("CQP annotations: "+cqpAnnotations);
|
|
| 180 | 189 |
if (cqpAnnotations.size() != matches.size() || temporaryAnnotations.size() != matches.size()) {
|
| 181 | 190 |
System.out.println("ERROR in getAnnotationsForMatches methods! ");
|
| 182 | 191 |
return new ArrayList<Annotation>(matches.size()); |
| 183 | 192 |
} |
| 184 | 193 |
// merge the 2 results |
| 185 |
for (int i = 0 ; i < matches.size() ; i++) {
|
|
| 194 |
for (int i = 0; i < matches.size(); i++) {
|
|
| 186 | 195 |
if (cqpAnnotations.get(i) == null && temporaryAnnotations.get(i) == null) {
|
| 187 | 196 |
resultAnnotations.add(null); |
| 188 |
} else if (temporaryAnnotations.get(i) != null) {
|
|
| 197 |
} |
|
| 198 |
else if (temporaryAnnotations.get(i) != null) {
|
|
| 189 | 199 |
resultAnnotations.add(temporaryAnnotations.get(i)); |
| 190 |
} else if (cqpAnnotations.get(i) != null) {
|
|
| 200 |
} |
|
| 201 |
else if (cqpAnnotations.get(i) != null) {
|
|
| 191 | 202 |
resultAnnotations.add(cqpAnnotations.get(i)); |
| 192 |
} else {
|
|
| 203 |
} |
|
| 204 |
else {
|
|
| 193 | 205 |
resultAnnotations.add(null); |
| 194 | 206 |
} |
| 195 | 207 |
} |
| 196 |
} catch (Exception e) {
|
|
| 208 |
} |
|
| 209 |
catch (Exception e) {
|
|
| 197 | 210 |
e.printStackTrace(); |
| 198 | 211 |
return new ArrayList<Annotation>(matches.size()); |
| 199 | 212 |
} |
| 200 | 213 |
return resultAnnotations; |
| 201 | 214 |
} |
| 202 |
|
|
| 215 |
|
|
| 203 | 216 |
public void clearInstance() {
|
| 204 | 217 |
try {
|
| 205 | 218 |
tempManager.close(); |
| 206 |
} catch(Exception e) {
|
|
| 207 |
System.out.println("Fail to clear AnnotationManager instance: "+e);
|
|
| 219 |
} |
|
| 220 |
catch (Exception e) {
|
|
| 221 |
System.out.println("Fail to clear AnnotationManager instance: " + e);
|
|
| 208 | 222 |
org.txm.utils.logger.Log.printStackTrace(e); |
| 209 | 223 |
} |
| 210 | 224 |
} |
| 211 |
|
|
| 225 |
|
|
| 212 | 226 |
public void checkData() {
|
| 213 | 227 |
try {
|
| 214 | 228 |
tempManager.checkData(); |
| 215 |
} catch (Exception e) {
|
|
| 229 |
} |
|
| 230 |
catch (Exception e) {
|
|
| 216 | 231 |
// TODO Auto-generated catch block |
| 217 | 232 |
e.printStackTrace(); |
| 218 | 233 |
} |
| 219 | 234 |
} |
| 220 |
|
|
| 235 |
|
|
| 221 | 236 |
public boolean hasChanges() {
|
| 222 | 237 |
return tempManager.hasChanges(); |
| 223 | 238 |
} |
| 224 |
|
|
| 239 |
|
|
| 225 | 240 |
/** |
| 226 | 241 |
* |
| 227 | 242 |
* @param annotSelectedType not null |
| ... | ... | |
| 230 | 245 |
* @param job may be null |
| 231 | 246 |
* @return |
| 232 | 247 |
*/ |
| 233 |
public HashMap<Match,List<Annotation>> createAnnotations(AnnotationType annotSelectedType, |
|
| 248 |
public HashMap<Match, List<Annotation>> createAnnotations(AnnotationType annotSelectedType,
|
|
| 234 | 249 |
TypedValue annotSelectedTypedValue, List<? extends Match> matches, IProgressMonitor job) {
|
| 235 |
|
|
| 236 |
HashMap<Match,List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>(); |
|
| 250 |
|
|
| 251 |
HashMap<Match, List<Annotation>> allAnnotationsThatCollides = new HashMap<Match, List<Annotation>>();
|
|
| 237 | 252 |
tempManager.getEntityManager().getTransaction().begin(); // warning |
| 238 | 253 |
for (Match match : matches) {
|
| 239 | 254 |
allAnnotationsThatCollides.put(match, new ArrayList<Annotation>()); |
| ... | ... | |
| 241 | 256 |
int start, end; |
| 242 | 257 |
if (match.getTarget() >= 0) {
|
| 243 | 258 |
start = end = match.getTarget(); |
| 244 |
} else {
|
|
| 259 |
} |
|
| 260 |
else {
|
|
| 245 | 261 |
start = match.getStart(); |
| 246 | 262 |
end = match.getEnd(); |
| 247 | 263 |
} |
| ... | ... | |
| 256 | 272 |
if (annotSelectedType.getEffect().equals(AnnotationEffect.SEGMENT)) {
|
| 257 | 273 |
cqpAnnotations = cqpManager.getAnnotations(null, match, null, true); // get all annotations |
| 258 | 274 |
// remove A)the wrapping annotations and B) the annotation with same type and same positions |
| 259 |
for (int i = 0 ; i < cqpAnnotations.size() ; i++) {
|
|
| 275 |
for (int i = 0; i < cqpAnnotations.size(); i++) {
|
|
| 260 | 276 |
Annotation a = cqpAnnotations.get(i); |
| 261 |
|
|
| 277 |
|
|
| 262 | 278 |
// exact match + exact type |
| 263 | 279 |
if (a.getType().equals(annotSelectedType.getId()) && a.getStart() == start && a.getEnd() == end) {
|
| 264 | 280 |
cqpAnnotations.remove(i); |
| 265 | 281 |
i--; |
| 266 |
} else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap
|
|
| 267 |
(a.getStart() <= start && end <= a.getEnd()) ||
|
|
| 268 |
(start <= a.getStart() && a.getEnd() <= end)
|
|
| 269 |
)) {
|
|
| 282 |
} |
|
| 283 |
else if (!a.getType().equals(annotSelectedType.getId()) && (// different type and inner or outer wrap
|
|
| 284 |
(a.getStart() <= start && end <= a.getEnd()) ||
|
|
| 285 |
(start <= a.getStart() && a.getEnd() <= end))) {
|
|
| 270 | 286 |
cqpAnnotations.remove(i); |
| 271 | 287 |
i--; |
| 272 | 288 |
} |
| 273 | 289 |
} |
| 274 |
} else {
|
|
| 290 |
} |
|
| 291 |
else {
|
|
| 275 | 292 |
// no need to test collision (AnnotationType=TOKEN) |
| 276 | 293 |
cqpAnnotations = new ArrayList<Annotation>(); |
| 277 | 294 |
} |
| ... | ... | |
| 279 | 296 |
if (cqpAnnotations.size() > 0) {
|
| 280 | 297 |
allAnnotationsThatCollides.get(match).addAll(cqpAnnotations); |
| 281 | 298 |
continue; // don't create annotation, process next match |
| 282 |
} else { // test with temporary annotation manager
|
|
| 299 |
} |
|
| 300 |
else { // test with temporary annotation manager
|
|
| 283 | 301 |
List<Annotation> tempAnnotations = null; |
| 284 |
if (annotSelectedType.getEffect() == AnnotationEffect.TOKEN) { // only annotate the first word
|
|
| 285 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, start); |
|
| 286 |
} else {
|
|
| 287 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, end); |
|
| 288 |
} |
|
| 302 |
if (annotSelectedType.getEffect() == AnnotationEffect.TOKEN) { // only annotate the first word
|
|
| 303 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, start); |
|
| 304 |
} |
|
| 305 |
else {
|
|
| 306 |
tempAnnotations = tempManager.createAnnotationNoCommit(annotSelectedType, annotSelectedTypedValue, start, end); |
|
| 307 |
} |
|
| 289 | 308 |
if (tempAnnotations.size() > 0) |
| 290 | 309 |
allAnnotationsThatCollides.get(match).addAll(tempAnnotations); |
| 291 | 310 |
} |
| 292 |
} catch (Exception e) {
|
|
| 311 |
} |
|
| 312 |
catch (Exception e) {
|
|
| 293 | 313 |
Log.printStackTrace(e); |
| 294 |
System.out.println("Error during annotation creation: "+e);
|
|
| 314 |
System.out.println("Error during annotation creation: " + e);
|
|
| 295 | 315 |
} |
| 296 | 316 |
|
| 297 | 317 |
if (allAnnotationsThatCollides.get(match).size() == 0) allAnnotationsThatCollides.remove(match); // keep only colision lists |
| ... | ... | |
| 300 | 320 |
tempManager.getEntityManager().getTransaction().commit(); // warning |
| 301 | 321 |
return allAnnotationsThatCollides; |
| 302 | 322 |
} |
| 303 |
|
|
| 323 |
|
|
| 304 | 324 |
public List<Annotation> getAnnotations(AnnotationType type, int start, int end, boolean overlap) {
|
| 305 | 325 |
List<Annotation> temporaryAnnotations = null; |
| 306 | 326 |
List<Annotation> cqpAnnotations = null; |
| ... | ... | |
| 315 | 335 |
} |
| 316 | 336 |
temporaryAnnotations.add(i, a); |
| 317 | 337 |
} |
| 318 |
} catch(Exception e) {
|
|
| 338 |
} |
|
| 339 |
catch (Exception e) {
|
|
| 319 | 340 |
|
| 320 | 341 |
} |
| 321 | 342 |
return temporaryAnnotations; |
| ... | ... | |
| 324 | 345 |
public List<Annotation> getAnnotations(AnnotationType type, int i, int j) {
|
| 325 | 346 |
return getAnnotations(type, i, j, false); |
| 326 | 347 |
} |
| 327 |
|
|
| 348 |
|
|
| 328 | 349 |
public void closeAll() {
|
| 329 |
Log.fine("Closing annotation manager of "+corpus);
|
|
| 350 |
Log.fine("Closing annotation manager of " + corpus);
|
|
| 330 | 351 |
tempManager.close(); |
| 331 | 352 |
cqpManager.close(); |
| 332 | 353 |
} |
| 333 |
|
|
| 354 |
|
|
| 334 | 355 |
public boolean isOpen() {
|
| 335 | 356 |
return tempManager.getEntityManager() != null && tempManager.getEntityManager().isOpen(); |
| 336 | 357 |
} |
| 337 |
|
|
| 338 |
|
|
| 358 |
|
|
| 359 |
|
|
| 339 | 360 |
public boolean initialize() throws Exception {
|
| 340 | 361 |
tempManager = new TemporaryAnnotationManager(corpus); |
| 341 | 362 |
dirty = tempManager.getAnnotations().size() > 0; |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationPK.java (revision 2396) | ||
|---|---|---|
| 6 | 6 |
|
| 7 | 7 |
@Embeddable |
| 8 | 8 |
public class AnnotationPK implements Serializable {
|
| 9 |
|
|
| 9 |
|
|
| 10 | 10 |
private static final long serialVersionUID = -2360693333015275209L; |
| 11 |
|
|
| 12 |
//corresponding to the start and end positions (in the corpus) |
|
| 11 |
|
|
| 12 |
// corresponding to the start and end positions (in the corpus)
|
|
| 13 | 13 |
private int startpos; |
| 14 |
|
|
| 14 | 15 |
private int endpos; |
| 16 |
|
|
| 15 | 17 |
private String refType; |
| 16 |
|
|
| 17 |
public AnnotationPK() {
|
|
| 18 |
} |
|
| 19 |
|
|
| 18 |
|
|
| 19 |
public AnnotationPK() {}
|
|
| 20 |
|
|
| 20 | 21 |
public AnnotationPK(int start, int end, String refType) {
|
| 21 | 22 |
this.startpos = start; |
| 22 | 23 |
this.endpos = end; |
| 23 | 24 |
this.refType = refType; |
| 24 | 25 |
} |
| 25 |
|
|
| 26 |
|
|
| 26 | 27 |
public void setRefType(String refType) {
|
| 27 | 28 |
this.refType = refType; |
| 28 | 29 |
} |
| 29 |
|
|
| 30 |
|
|
| 30 | 31 |
public void setStartPosition(int start) {
|
| 31 | 32 |
this.startpos = start; |
| 32 | 33 |
} |
| 33 |
|
|
| 34 |
|
|
| 34 | 35 |
public void setEndPosition(int end) {
|
| 35 | 36 |
this.endpos = end; |
| 36 | 37 |
} |
| 37 |
|
|
| 38 |
|
|
| 38 | 39 |
public String getRefType() {
|
| 39 | 40 |
return refType; |
| 40 | 41 |
} |
| 41 |
|
|
| 42 |
|
|
| 42 | 43 |
public int getStartPosition() {
|
| 43 | 44 |
return startpos; |
| 44 | 45 |
} |
| 45 |
|
|
| 46 |
|
|
| 46 | 47 |
public int getEndPosition() {
|
| 47 | 48 |
return endpos; |
| 48 | 49 |
} |
| 49 |
|
|
| 50 |
|
|
| 50 | 51 |
public boolean equals(Object obj) {
|
| 51 | 52 |
if (obj == null) return false; |
| 52 | 53 |
if (obj == this) return true; |
| 53 | 54 |
if (!(obj instanceof AnnotationPK)) return false; |
| 54 |
|
|
| 55 |
|
|
| 55 | 56 |
AnnotationPK other = (AnnotationPK) obj; |
| 56 |
return startpos == other.startpos && endpos == other.endpos&& refType.equals(other.refType); |
|
| 57 |
return startpos == other.startpos && endpos == other.endpos && refType.equals(other.refType);
|
|
| 57 | 58 |
} |
| 58 |
|
|
| 59 |
|
|
| 59 | 60 |
public int hashCode() {
|
| 60 |
return refType.hashCode()+startpos+endpos;
|
|
| 61 |
return refType.hashCode() + startpos + endpos;
|
|
| 61 | 62 |
} |
| 62 |
|
|
| 63 |
|
|
| 63 | 64 |
public String toString() {
|
| 64 |
return getRefType() + "["+getStartPosition()+"-"+getEndPosition()+"]" ;
|
|
| 65 |
return getRefType() + "[" + getStartPosition() + "-" + getEndPosition() + "]";
|
|
| 65 | 66 |
} |
| 66 | 67 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/CorpusRuledConvertion.java (revision 2396) | ||
|---|---|---|
| 16 | 16 |
import org.txm.utils.io.IOUtils; |
| 17 | 17 |
|
| 18 | 18 |
public class CorpusRuledConvertion {
|
| 19 |
|
|
| 19 |
|
|
| 20 | 20 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
| 21 |
|
|
| 21 | 22 |
private String oldType; |
| 23 |
|
|
| 22 | 24 |
private String newType; |
| 23 |
|
|
| 25 |
|
|
| 24 | 26 |
public CorpusRuledConvertion(File conversionFile, String oldType, String newType) throws IOException {
|
| 25 | 27 |
this.oldType = oldType; |
| 26 | 28 |
this.newType = newType; |
| 27 |
|
|
| 29 |
|
|
| 28 | 30 |
BufferedReader reader = IOUtils.getReader(conversionFile); |
| 29 | 31 |
String line = reader.readLine(); |
| 30 | 32 |
while (line != null) {
|
| 31 | 33 |
int idx = line.indexOf("\t");
|
| 32 | 34 |
if (idx > 0) {
|
| 33 | 35 |
String k = line.substring(0, idx); |
| 34 |
String v = line.substring(idx +1); |
|
| 36 |
String v = line.substring(idx + 1);
|
|
| 35 | 37 |
rules.put(Pattern.compile(k), v); |
| 36 | 38 |
} |
| 37 | 39 |
line = reader.readLine(); |
| 38 | 40 |
} |
| 39 |
|
|
| 40 |
System.out.println("Conversion rules: "+rules);
|
|
| 41 |
|
|
| 42 |
System.out.println("Conversion rules: " + rules);
|
|
| 41 | 43 |
} |
| 42 |
|
|
| 44 |
|
|
| 43 | 45 |
public CorpusRuledConvertion(LinkedHashMap<Pattern, String> rules, |
| 44 | 46 |
String oldType, String newType) {
|
| 45 | 47 |
this.oldType = oldType; |
| 46 | 48 |
this.newType = newType; |
| 47 |
|
|
| 49 |
|
|
| 48 | 50 |
this.rules = rules; |
| 49 | 51 |
} |
| 50 |
|
|
| 52 |
|
|
| 51 | 53 |
public boolean process(MainCorpus corpus) throws XMLStreamException, IOException {
|
| 52 | 54 |
File binaryCorpusDirectory = corpus.getProjectDirectory(); |
| 53 | 55 |
File txmDirectory = new File(binaryCorpusDirectory, "txm"); |
| 54 | 56 |
if (!txmDirectory.exists()) {
|
| 55 |
System.out.println("'txm' directory not found in "+binaryCorpusDirectory.getAbsolutePath());
|
|
| 57 |
System.out.println("'txm' directory not found in " + binaryCorpusDirectory.getAbsolutePath());
|
|
| 56 | 58 |
return false; |
| 57 | 59 |
} |
| 58 | 60 |
File txmCorpusDirectory = new File(txmDirectory, corpus.getID()); |
| 59 | 61 |
if (!txmCorpusDirectory.exists()) {
|
| 60 |
System.out.println("'"+corpus.getName()+"' corpus directory not found in "+txmDirectory.getAbsolutePath());
|
|
| 62 |
System.out.println("'" + corpus.getName() + "' corpus directory not found in " + txmDirectory.getAbsolutePath());
|
|
| 61 | 63 |
return false; |
| 62 | 64 |
} |
| 63 | 65 |
File[] files = txmCorpusDirectory.listFiles(IOUtils.HIDDENFILE_FILTER); |
| 64 | 66 |
if (files == null || files.length == 0) {
|
| 65 |
System.out.println("No file in "+txmCorpusDirectory);
|
|
| 67 |
System.out.println("No file in " + txmCorpusDirectory);
|
|
| 66 | 68 |
return false; |
| 67 | 69 |
} |
| 68 | 70 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
| ... | ... | |
| 71 | 73 |
if (xmlFile.isDirectory()) continue; |
| 72 | 74 |
if (xmlFile.isHidden()) continue; |
| 73 | 75 |
if (!xmlFile.getName().endsWith(".xml")) continue;
|
| 74 |
|
|
| 75 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName());
|
|
| 76 |
|
|
| 77 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_" + xmlFile.getName());
|
|
| 76 | 78 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, XMLTXMFileRuledConversion.ABANDON); |
| 77 | 79 |
if (converter.process(tmpFile)) {
|
| 78 | 80 |
xmlFile.delete(); |
| 79 |
// try {
|
|
| 80 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName()));
|
|
| 81 |
// } catch (IOException e) {
|
|
| 82 |
// // TODO Auto-generated catch block
|
|
| 83 |
// e.printStackTrace();
|
|
| 84 |
// }
|
|
| 85 |
|
|
| 81 |
// try {
|
|
| 82 |
// FileCopy.copy(tmpFile, new File("/tmp/"+tmpFile.getName()));
|
|
| 83 |
// } catch (IOException e) {
|
|
| 84 |
// // TODO Auto-generated catch block
|
|
| 85 |
// e.printStackTrace();
|
|
| 86 |
// }
|
|
| 87 |
|
|
| 86 | 88 |
HashSet<String> errors = converter.getNoMatchValues(); |
| 87 | 89 |
if (errors.size() > 0) {
|
| 88 | 90 |
System.out.println("Some values did not match rule:");
|
| 89 | 91 |
int i = 0; |
| 90 | 92 |
for (String error : errors) {
|
| 91 |
System.out.println("\t"+error);
|
|
| 93 |
System.out.println("\t" + error);
|
|
| 92 | 94 |
if (i >= 10) break; |
| 93 | 95 |
} |
| 94 | 96 |
if (errors.size() > 10) {
|
| 95 | 97 |
try {
|
| 96 | 98 |
File errorFile = new File(Toolbox.getTxmHomePath(), "errors.txt"); |
| 97 | 99 |
IOUtils.write(errorFile, StringUtils.join(errors, "\t")); |
| 98 |
System.out.println("More errors, see "+errorFile.getAbsolutePath());
|
|
| 99 |
} catch (Exception e) {
|
|
| 100 |
System.out.println("More errors, see " + errorFile.getAbsolutePath());
|
|
| 101 |
} |
|
| 102 |
catch (Exception e) {
|
|
| 100 | 103 |
e.printStackTrace(); |
| 101 | 104 |
} |
| 102 | 105 |
} |
| 103 | 106 |
return false; |
| 104 | 107 |
} |
| 105 |
|
|
| 108 |
|
|
| 106 | 109 |
tmpFile.renameTo(xmlFile); |
| 107 | 110 |
if (tmpFile.exists()) {
|
| 108 |
System.out.println("Could not replace original file with the result file. "+xmlFile+ " with "+tmpFile);
|
|
| 111 |
System.out.println("Could not replace original file with the result file. " + xmlFile + " with " + tmpFile);
|
|
| 109 | 112 |
return false; |
| 110 | 113 |
} |
| 111 |
} else {
|
|
| 112 |
System.out.println("Fail to process "+xmlFile);
|
|
| 114 |
} |
|
| 115 |
else {
|
|
| 116 |
System.out.println("Fail to process " + xmlFile);
|
|
| 113 | 117 |
return false; |
| 114 | 118 |
} |
| 115 | 119 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 2396) | ||
|---|---|---|
| 11 | 11 |
import org.txm.importer.StaxIdentityParser; |
| 12 | 12 |
|
| 13 | 13 |
public class XMLTXMFileRuledConversion extends StaxIdentityParser {
|
| 14 |
|
|
| 14 |
|
|
| 15 | 15 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
| 16 |
|
|
| 16 | 17 |
protected String oldType; |
| 18 |
|
|
| 17 | 19 |
protected String newType; |
| 18 |
|
|
| 20 |
|
|
| 19 | 21 |
public static final String DELETE = "supprimer"; |
| 22 |
|
|
| 20 | 23 |
public static final String COPY = "copier"; |
| 24 |
|
|
| 21 | 25 |
public static final String ABANDON = "abandon"; |
| 26 |
|
|
| 22 | 27 |
HashSet<String> noMatchValues = new HashSet<String>(); |
| 23 | 28 |
|
| 24 | 29 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException {
|
| ... | ... | |
| 29 | 34 |
|
| 30 | 35 |
this.mode = mode; |
| 31 | 36 |
|
| 32 |
if (!this.newType.startsWith("#")) this.newType = "#"+this.newType;
|
|
| 33 |
if (!this.oldType.startsWith("#")) this.oldType = "#"+this.oldType;
|
|
| 37 |
if (!this.newType.startsWith("#")) this.newType = "#" + this.newType;
|
|
| 38 |
if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType;
|
|
| 34 | 39 |
} |
| 35 |
|
|
| 40 |
|
|
| 36 | 41 |
boolean inW = false, inAna = false, inForm; |
| 42 |
|
|
| 37 | 43 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
| 44 |
|
|
| 38 | 45 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
| 46 |
|
|
| 39 | 47 |
String typeName = null; |
| 48 |
|
|
| 40 | 49 |
String respName = null; |
| 50 |
|
|
| 41 | 51 |
String formValue, typeValue = null; |
| 52 |
|
|
| 42 | 53 |
private Object mode; |
| 43 |
|
|
| 54 |
|
|
| 44 | 55 |
@Override |
| 45 | 56 |
public void processStartElement() throws XMLStreamException, IOException {
|
| 46 | 57 |
if (!inW) super.processStartElement(); // don't write W content |
| 47 |
|
|
| 58 |
|
|
| 48 | 59 |
if (localname.equals("w")) {
|
| 49 | 60 |
inW = true; |
| 50 | 61 |
anaValues.clear(); |
| 51 | 62 |
anaResps.clear(); |
| 52 |
|
|
| 53 |
//initialize the new type to a empty value in case there is transformation rule |
|
| 63 |
|
|
| 64 |
// initialize the new type to a empty value in case there is transformation rule
|
|
| 54 | 65 |
anaValues.put(newType, ""); |
| 55 | 66 |
anaResps.put(newType, "#txm_recode"); |
| 56 |
} else if (localname.equals("ana")) {
|
|
| 67 |
} |
|
| 68 |
else if (localname.equals("ana")) {
|
|
| 57 | 69 |
inAna = true; |
| 58 | 70 |
typeName = parser.getAttributeValue(null, "type"); |
| 59 | 71 |
respName = parser.getAttributeValue(null, "resp"); |
| 60 | 72 |
anaResps.put(typeName, respName); |
| 61 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
| 73 |
// if (typeName != null) typeName = typeName.substring(1); // remove #
|
|
| 62 | 74 |
typeValue = ""; |
| 63 |
} else if (localname.equals("form")) {
|
|
| 75 |
} |
|
| 76 |
else if (localname.equals("form")) {
|
|
| 64 | 77 |
inForm = true; |
| 65 | 78 |
formValue = ""; |
| 66 |
}
|
|
| 79 |
} |
|
| 67 | 80 |
} |
| 68 |
|
|
| 81 |
|
|
| 69 | 82 |
@Override |
| 70 | 83 |
public void processCharacters() throws XMLStreamException {
|
| 71 |
if (inW && inAna) typeValue+=parser.getText();
|
|
| 72 |
else if (inW && inForm) formValue+=parser.getText();
|
|
| 84 |
if (inW && inAna) typeValue += parser.getText();
|
|
| 85 |
else if (inW && inForm) formValue += parser.getText();
|
|
| 73 | 86 |
else super.processCharacters(); |
| 74 | 87 |
} |
| 75 |
|
|
| 88 |
|
|
| 76 | 89 |
@Override |
| 77 | 90 |
public void processEndElement() throws XMLStreamException {
|
| 78 | 91 |
if (localname.equals("w")) {
|
| 79 | 92 |
inW = false; |
| 80 |
|
|
| 93 |
|
|
| 81 | 94 |
// write W content |
| 82 | 95 |
try {
|
| 83 | 96 |
// get the value to test |
| 84 | 97 |
String value = null; |
| 85 | 98 |
if (oldType.equals("word")) {
|
| 86 | 99 |
value = formValue; |
| 87 |
} else {
|
|
| 100 |
} |
|
| 101 |
else {
|
|
| 88 | 102 |
value = anaValues.get(oldType); |
| 89 | 103 |
} |
| 90 | 104 |
|
| 91 | 105 |
if (newType.equals("word")) { // update form property
|
| 92 | 106 |
updateFormValueIfMatch(value); |
| 93 |
} else { // update another word property
|
|
| 107 |
} |
|
| 108 |
else { // update another word property
|
|
| 94 | 109 |
if (value != null) {
|
| 95 | 110 |
updateAnaValuesIfMatch(value); |
| 96 | 111 |
} |
| ... | ... | |
| 100 | 115 |
writer.writeStartElement("txm:form");
|
| 101 | 116 |
writer.writeCharacters(formValue); |
| 102 | 117 |
writer.writeEndElement(); |
| 103 |
|
|
| 118 |
|
|
| 104 | 119 |
for (String k : anaValues.keySet()) {
|
| 105 | 120 |
String resp = anaResps.get(k); |
| 106 | 121 |
if (resp == null) resp = "#txm_recode"; |
| 107 |
|
|
| 122 |
|
|
| 108 | 123 |
writer.writeStartElement("txm:ana");
|
| 109 | 124 |
writer.writeAttribute("resp", resp);
|
| 110 | 125 |
writer.writeAttribute("type", k);
|
| 111 | 126 |
writer.writeCharacters(anaValues.get(k)); |
| 112 | 127 |
writer.writeEndElement(); |
| 113 | 128 |
} |
| 114 |
} catch (XMLStreamException e) {
|
|
| 129 |
} |
|
| 130 |
catch (XMLStreamException e) {
|
|
| 115 | 131 |
e.printStackTrace(); |
| 116 | 132 |
} |
| 117 |
} else if (localname.equals("ana")) {
|
|
| 133 |
} |
|
| 134 |
else if (localname.equals("ana")) {
|
|
| 118 | 135 |
anaValues.put(typeName, typeValue); |
| 119 | 136 |
inAna = false; |
| 120 |
} else if (localname.equals("form")) {
|
|
| 137 |
} |
|
| 138 |
else if (localname.equals("form")) {
|
|
| 121 | 139 |
inForm = false; |
| 122 |
}
|
|
| 123 |
|
|
| 140 |
} |
|
| 141 |
|
|
| 124 | 142 |
if (!inW) super.processEndElement(); // don't write W content |
| 125 | 143 |
} |
| 126 |
|
|
| 144 |
|
|
| 127 | 145 |
protected void updateFormValueIfMatch(String value) {
|
| 128 | 146 |
for (Pattern rule : rules.keySet()) {
|
| 129 | 147 |
if (rule.matcher(value).matches()) {
|
| ... | ... | |
| 131 | 149 |
return; // ok stop |
| 132 | 150 |
} |
| 133 | 151 |
} |
| 134 |
|
|
| 152 |
|
|
| 135 | 153 |
noMatchValues.add(value); |
| 136 | 154 |
} |
| 137 |
|
|
| 155 |
|
|
| 138 | 156 |
protected void updateAnaValuesIfMatch(String value) {
|
| 139 | 157 |
for (Pattern rule : rules.keySet()) {
|
| 140 | 158 |
if (rule.matcher(value).matches()) {
|
| ... | ... | |
| 145 | 163 |
} |
| 146 | 164 |
} |
| 147 | 165 |
if (DELETE.equals(mode)) anaValues.put(newType, ""); // do a replace if newType == oldType :-) |
| 148 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR("+value+")"); // do a replace if newType == oldType :-)
|
|
| 166 |
else if (ABANDON.equals(mode)) anaValues.put(newType, "ERROR(" + value + ")"); // do a replace if newType == oldType :-)
|
|
| 149 | 167 |
|
| 150 | 168 |
noMatchValues.add(value); |
| 151 | 169 |
} |
| ... | ... | |
| 153 | 171 |
public HashSet<String> getNoMatchValues() {
|
| 154 | 172 |
return noMatchValues; |
| 155 | 173 |
} |
| 156 |
|
|
| 174 |
|
|
| 157 | 175 |
public static void main(String args[]) {
|
| 158 | 176 |
try {
|
| 159 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml");
|
|
| 160 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml");
|
|
| 161 |
String oldType = "type"; |
|
| 162 |
String newType = "type"; |
|
| 163 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
| 164 |
rules.put(Pattern.compile("w"), "WORD");
|
|
| 165 |
rules.put(Pattern.compile("x.+"), "XWORD");
|
|
| 166 |
rules.put(Pattern.compile("y"), "YWORD");
|
|
| 167 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD");
|
|
| 168 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
| 169 |
System.out.println(converter.process(tmpFile)); |
|
| 170 |
} catch (Exception e) {
|
|
| 177 |
File xmlFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test.xml");
|
|
| 178 |
File tmpFile = new File("/home/mdecorde/TXM/corpora/XTZTEXTUALPLANS/txm/XTZTEXTUALPLANS/test-o.xml");
|
|
| 179 |
String oldType = "type"; |
|
| 180 |
String newType = "type"; |
|
| 181 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); |
|
| 182 |
rules.put(Pattern.compile("w"), "WORD");
|
|
| 183 |
rules.put(Pattern.compile("x.+"), "XWORD");
|
|
| 184 |
rules.put(Pattern.compile("y"), "YWORD");
|
|
| 185 |
rules.put(Pattern.compile("y.*"), "YMULTIWORD");
|
|
| 186 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, oldType, newType, ABANDON); |
|
| 187 |
System.out.println(converter.process(tmpFile)); |
|
| 188 |
} |
|
| 189 |
catch (Exception e) {
|
|
| 171 | 190 |
e.printStackTrace(); |
| 172 | 191 |
} |
| 173 | 192 |
} |
| tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationWriter.java (revision 2396) | ||
|---|---|---|
| 33 | 33 |
|
| 34 | 34 |
|
| 35 | 35 |
public class AnnotationWriter {
|
| 36 |
|
|
| 36 |
|
|
| 37 | 37 |
MainCorpus corpus; |
| 38 |
|
|
| 38 | 39 |
private List<AnnotationType> types; |
| 40 |
|
|
| 39 | 41 |
private KnowledgeRepository defaultKR; |
| 40 |
|
|
| 41 |
public AnnotationWriter(MainCorpus corpus) throws BackingStoreException{
|
|
| 42 |
|
|
| 43 |
public AnnotationWriter(MainCorpus corpus) throws BackingStoreException {
|
|
| 42 | 44 |
this.corpus = corpus; |
| 43 | 45 |
List<String> krnames = KRAnnotationEngine.getKnowledgeRepositoryNames(corpus); |
| 44 | 46 |
if (krnames.size() == 0) {
|
| 45 | 47 |
Log.severe(NLS.bind("** Error: no knowledge repository found in {0} corpus.", corpus));
|
| 46 |
throw new IllegalArgumentException("No kr in "+corpus);
|
|
| 48 |
throw new IllegalArgumentException("No kr in " + corpus);
|
|
| 47 | 49 |
} |
| 48 | 50 |
String t = krnames.get(0); |
| 49 | 51 |
defaultKR = KRAnnotationEngine.getKnowledgeRepository(corpus, t); |
| 50 | 52 |
if (defaultKR == null) {
|
| 51 | 53 |
Log.severe(NLS.bind("** Error: no knowledge repository {0} found in {0} corpus.", defaultKR, corpus));
|
| 52 |
throw new IllegalArgumentException("No kr "+defaultKR+" in "+corpus);
|
|
| 54 |
throw new IllegalArgumentException("No kr " + defaultKR + " in " + corpus);
|
|
| 53 | 55 |
} |
| 54 | 56 |
types = defaultKR.getAllAnnotationTypes(); |
| 55 | 57 |
|
| 56 | 58 |
} |
| 57 |
|
|
| 59 |
|
|
| 58 | 60 |
/** |
| 59 | 61 |
* process a text to build standoff files |
| 60 | 62 |
* |
| ... | ... | |
| 67 | 69 |
* @throws CqiServerError |
| 68 | 70 |
* @throws CqiClientException |
| 69 | 71 |
* @throws InvalidCqpIdException |
| 70 |
* @throws XMLStreamException
|
|
| 72 |
* @throws XMLStreamException |
|
| 71 | 73 |
*/ |
| 72 |
protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
|
|
| 73 |
System.out.println(" text="+textid);
|
|
| 74 |
protected boolean writeTextAnnotationToSyMoGIH(String textid, File currentXMLFile, File currentXMLStandoffFile, File xmlStandOffDirectory) throws IOException, CqiServerError, CqiClientException, |
|
| 75 |
InvalidCqpIdException, XMLStreamException {
|
|
| 76 |
System.out.println(" text=" + textid);
|
|
| 74 | 77 |
boolean show_debug = Log.getLevel().intValue() < Level.INFO.intValue(); |
| 75 | 78 |
|
| 76 | 79 |
AnnotationSyMoGIHWriter annotationstdoff = new AnnotationSyMoGIHWriter(textid, currentXMLFile, xmlStandOffDirectory, types, show_debug); |
| 77 | 80 |
|
| 78 |
///rather test on the new xml standoff files |
|
| 81 |
/// rather test on the new xml standoff files
|
|
| 79 | 82 |
if (annotationstdoff.process(currentXMLStandoffFile)) {
|
| 80 |
if (ValidateXml.test(currentXMLStandoffFile)) { //TODO ALSO check if annotations are well-written
|
|
| 83 |
if (ValidateXml.test(currentXMLStandoffFile)) { // TODO ALSO check if annotations are well-written
|
|
| 81 | 84 |
return true; |
| 82 |
} else {
|
|
| 83 |
System.out.println("Error: result file "+currentXMLStandoffFile+" is malformed.");
|
|
| 84 | 85 |
} |
| 85 |
} else {
|
|
| 86 |
System.out.println("Error: while processing "+currentXMLStandoffFile+" in standoff dir");
|
|
| 86 |
else {
|
|
| 87 |
System.out.println("Error: result file " + currentXMLStandoffFile + " is malformed.");
|
|
| 88 |
} |
|
| 87 | 89 |
} |
| 90 |
else {
|
|
| 91 |
System.out.println("Error: while processing " + currentXMLStandoffFile + " in standoff dir");
|
|
| 92 |
} |
|
| 88 | 93 |
return false; |
| 89 | 94 |
|
| 90 | 95 |
} |
| ... | ... | |
| 98 | 103 |
* @throws CqiServerError |
| 99 | 104 |
* @throws CqiClientException |
| 100 | 105 |
* @throws InvalidCqpIdException |
| 101 |
* @throws XMLStreamException
|
|
| 106 |
* @throws XMLStreamException |
|
| 102 | 107 |
*/ |
| 103 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{
|
|
| 108 |
public boolean writeAnnotationsInStandoff(File resultZipFile) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException {
|
|
| 104 | 109 |
|
| 105 | 110 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList()); |
| 106 |
System.out.println("Exporting annotations of "+StringUtils.join(textsIds, ", ")+".");
|
|
| 107 |
|
|
| 108 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/"+corpus.getID()+"_annotations");
|
|
| 111 |
System.out.println("Exporting annotations of " + StringUtils.join(textsIds, ", ") + ".");
|
|
| 112 |
|
|
| 113 |
File resultDirectory = new File(Toolbox.getTxmHomePath(), "results/" + corpus.getID() + "_annotations");
|
|
| 109 | 114 |
DeleteDir.deleteDirectory(resultDirectory); |
| 110 | 115 |
resultDirectory.mkdirs(); |
| 111 | 116 |
if (!(resultDirectory.exists() && resultDirectory.canWrite())) {
|
| 112 |
System.out.println("ERROR: could not create/write temporary directory: "+resultDirectory);
|
|
| 117 |
System.out.println("ERROR: could not create/write temporary directory: " + resultDirectory);
|
|
| 113 | 118 |
return false; |
| 114 | 119 |
} |
| 115 | 120 |
|
| 116 | 121 |
File inputDirectory = corpus.getProjectDirectory(); |
| 117 |
File corpusTxmDirectory = new File(inputDirectory, "txm/"+corpus.getID());
|
|
| 122 |
File corpusTxmDirectory = new File(inputDirectory, "txm/" + corpus.getID());
|
|
| 118 | 123 |
|
| 119 |
System.out.println("Writing annotations XML files in "+resultDirectory);
|
|
| 120 |
for (String textid : textsIds) {
|
|
| 121 |
File currentXMLFile = new File(corpusTxmDirectory, textid+".xml");
|
|
| 124 |
System.out.println("Writing annotations XML files in " + resultDirectory);
|
|
| 125 |
for (String textid : textsIds) {
|
|
| 126 |
File currentXMLFile = new File(corpusTxmDirectory, textid + ".xml");
|
|
| 122 | 127 |
if (!currentXMLFile.exists()) {
|
| 123 |
System.out.println("Error: cannot found XML file for text with id="+textid);
|
|
| 128 |
System.out.println("Error: cannot found XML file for text with id=" + textid);
|
|
| 124 | 129 |
return false; |
| 125 | 130 |
} |
| 126 |
File currentXMLStandoffFile = new File(resultDirectory, textid+".xml"); //To Be Changed ?
|
|
| 127 |
if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory )) {
|
|
| 128 |
System.out.println("Error while writing annotations of text "+currentXMLStandoffFile);
|
|
| 129 |
return false;
|
|
| 131 |
File currentXMLStandoffFile = new File(resultDirectory, textid + ".xml"); // To Be Changed ?
|
|
| 132 |
if (!writeTextAnnotationToSyMoGIH(textid, currentXMLFile, currentXMLStandoffFile, resultDirectory)) {
|
|
| 133 |
System.out.println("Error while writing annotations of text " + currentXMLStandoffFile);
|
|
| 134 |
return false; |
|
| 130 | 135 |
} |
| 131 | 136 |
} |
| 132 | 137 |
|
| 133 | 138 |
Zip.compress(resultDirectory, resultZipFile, new ConsoleProgressBar(1)); |
| 134 | 139 |
DeleteDir.deleteDirectory(resultDirectory); |
| 135 | 140 |
|
| 136 |
System.out.println("Annotations saved in "+resultZipFile.getAbsolutePath());
|
|
| 141 |
System.out.println("Annotations saved in " + resultZipFile.getAbsolutePath());
|
|
| 137 | 142 |
return resultZipFile.exists(); |
| 138 | 143 |
} |
| 139 | 144 |
|
| 140 | 145 |
/** |
| 141 | 146 |
* |
| 142 | 147 |
* @param allCorpusAnnotations ordered annotations |
| 143 |
* @param monitor
|
|
| 148 |
* @param monitor |
|
| 144 | 149 |
* @return |
| 145 | 150 |
* @throws IOException |
| 146 | 151 |
* @throws CqiServerError |
| 147 | 152 |
* @throws CqiClientException |
| 148 | 153 |
* @throws InvalidCqpIdException |
| 149 |
* @throws XMLStreamException
|
|
| 150 |
* @throws BackingStoreException
|
|
| 154 |
* @throws XMLStreamException |
|
| 155 |
* @throws BackingStoreException |
|
| 151 | 156 |
*/ |
| 152 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations, IProgressMonitor monitor) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException, BackingStoreException{
|
|
| 153 |
//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
|
| 157 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations, IProgressMonitor monitor) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException, |
|
| 158 |
BackingStoreException {
|
|
| 159 |
// MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
|
| 154 | 160 |
Log.info(NLS.bind(KRAnnotationCoreMessages.savingP0Annotations, allCorpusAnnotations.size())); |
| 155 |
|
|
| 161 |
|
|
| 156 | 162 |
int[] end_limits = corpus.getTextEndLimits(); |
| 157 | 163 |
int[] start_limits = corpus.getTextStartLimits(); |
| 158 | 164 |
List<String> textsIds = Arrays.asList(corpus.getCorpusTextIdsList()); |
| 159 |
|
|
| 165 |
|
|
| 160 | 166 |
File inputDirectory = corpus.getProjectDirectory(); |
| 161 |
File txmDirectory = new File(inputDirectory, "txm/"+corpus.getID());
|
|
| 162 |
|
|
| 167 |
File txmDirectory = new File(inputDirectory, "txm/" + corpus.getID());
|
|
| 168 |
|
|
| 163 | 169 |
ArrayList<Annotation> textAnnotations = new ArrayList<Annotation>(); |
| 164 | 170 |
HashMap<String, ArrayList<Annotation>> annotationsPerTexts = new HashMap<String, ArrayList<Annotation>>(); |
| 165 |
|
|
| 171 |
|
|
| 166 | 172 |
int currentText = 0; |
| 167 |
File currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
| 168 |
|
|
| 169 |
//store first text |
|
| 170 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
| 173 |
File currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
| 174 |
|
|
| 175 |
// store first text
|
|
| 176 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
| 171 | 177 |
textAnnotations = new ArrayList<Annotation>(); |
| 172 | 178 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
| 173 |
|
|
| 179 |
|
|
| 174 | 180 |
// group annotations per text |
| 175 | 181 |
for (Annotation currentAnnot : allCorpusAnnotations) { // parse all annotations
|
| 176 |
//System.out.println(" Annotation: "+currentAnnot);
|
|
| 182 |
// System.out.println(" Annotation: "+currentAnnot);
|
|
| 177 | 183 |
int pos = currentAnnot.getPK().getEndPosition(); |
| 178 |
|
|
| 179 |
while (pos > end_limits[currentText]) { // while pos is not in the currentText.end
|
|
| 184 |
|
|
| 185 |
while (pos > end_limits[currentText]) { // while pos is not in the currentText.end
|
|
| 180 | 186 |
currentText++; |
| 181 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
| 187 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
| 182 | 188 |
textAnnotations = new ArrayList<Annotation>(); |
| 183 | 189 |
annotationsPerTexts.put(textsIds.get(currentText), textAnnotations); |
| 184 | 190 |
} |
| 185 |
|
|
| 191 |
|
|
| 186 | 192 |
textAnnotations.add(currentAnnot); |
| 187 | 193 |
} |
| 188 |
|
|
| 189 |
File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_tmp");
|
|
| 194 |
|
|
| 195 |
File tmpXMLTXMDirectory = new File(txmDirectory.getAbsolutePath() + "_tmp");
|
|
| 190 | 196 |
DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
| 191 | 197 |
tmpXMLTXMDirectory.mkdirs(); |
| 192 | 198 |
if (!(tmpXMLTXMDirectory.exists() && tmpXMLTXMDirectory.canWrite())) {
|
| 193 |
System.out.println("ERROR: could not create directory: "+tmpXMLTXMDirectory);
|
|
| 199 |
System.out.println("ERROR: could not create directory: " + tmpXMLTXMDirectory);
|
|
| 194 | 200 |
return false; |
| 195 | 201 |
} |
| 196 |
|
|
| 197 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous");
|
|
| 198 |
//DeleteDir.deleteDirectory(tmpXMLTXMDirectory); |
|
| 202 |
|
|
| 203 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath() + "_previous");
|
|
| 204 |
// DeleteDir.deleteDirectory(tmpXMLTXMDirectory);
|
|
| 199 | 205 |
previousXMLTXMDirectory.mkdirs(); |
| 200 |
if (!(previousXMLTXMDirectory.exists() && previousXMLTXMDirectory.canWrite())) {
|
|
| 201 |
System.out.println("ERROR: could not create directory: "+previousXMLTXMDirectory);
|
|
| 206 |
if (!(previousXMLTXMDirectory.exists() && previousXMLTXMDirectory.canWrite())) {
|
|
| 207 |
System.out.println("ERROR: could not create directory: " + previousXMLTXMDirectory);
|
|
| 202 | 208 |
return false; |
| 203 | 209 |
} |
| 204 |
|
|
| 205 |
Log.fine("Annotations grouped per text for "+annotationsPerTexts.size()+" text"+(annotationsPerTexts.size() > 0?"s":""));
|
|
| 206 |
Log.fine(" - Writing temporary XML files in: "+tmpXMLTXMDirectory);
|
|
| 207 |
Log.fine(" - Copying previous version of XML files in: "+previousXMLTXMDirectory);
|
|
| 210 |
|
|
| 211 |
Log.fine("Annotations grouped per text for " + annotationsPerTexts.size() + " text" + (annotationsPerTexts.size() > 0 ? "s" : ""));
|
|
| 212 |
Log.fine(" - Writing temporary XML files in: " + tmpXMLTXMDirectory);
|
|
| 213 |
Log.fine(" - Copying previous version of XML files in: " + previousXMLTXMDirectory);
|
|
| 208 | 214 |
// for all annotation PER TEXT, update the XML-TXM files |
| 209 | 215 |
currentText = 0; |
| 210 |
while (currentText < end_limits.length ) { // end limits : 10, 30, 45, 55, 103
|
|
| 211 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText)+".xml");
|
|
| 216 |
while (currentText < end_limits.length) { // end limits : 10, 30, 45, 55, 103
|
|
| 217 |
currentXMLFile = new File(txmDirectory, textsIds.get(currentText) + ".xml");
|
|
| 212 | 218 |
ArrayList<Annotation> allAnnotations = annotationsPerTexts.get(textsIds.get(currentText)); |
| 213 | 219 |
if (allAnnotations != null && allAnnotations.size() > 0) {
|
| 214 | 220 |
ArrayList<Annotation> allSegmentAnnotations = new ArrayList<Annotation>(); |
| 215 | 221 |
ArrayList<Annotation> allTokenAnnotations = new ArrayList<Annotation>(); |
| 216 |
|
|
| 217 |
//System.out.println("Using KR="+defaultKR);
|
|
| 222 |
|
|
| 223 |
// System.out.println("Using KR="+defaultKR);
|
|
| 218 | 224 |
for (Annotation a : allAnnotations) {
|
| 219 | 225 |
AnnotationType type = defaultKR.getType(a.getType()); |
| 220 | 226 |
if (type != null) {
|
| 221 | 227 |
if (AnnotationEffect.SEGMENT.equals(type.getEffect())) {
|
| 222 | 228 |
allSegmentAnnotations.add(a); |
| 223 |
} else if (AnnotationEffect.TOKEN.equals(type.getEffect())) {
|
|
| 229 |
} |
|
| 230 |
else if (AnnotationEffect.TOKEN.equals(type.getEffect())) {
|
|
| 224 | 231 |
allTokenAnnotations.add(a); |
| 225 |
} else {
|
|
| 226 |
Log.fine("Annotation "+a+" with type="+a.getType()+" not found in default KR="+defaultKR);
|
|
| 227 | 232 |
} |
| 228 |
} else {
|
|
| 229 |
Log.warning("Warning: unknowed type: "+a.getType());
|
|
| 233 |
else {
|
|
| 234 |
Log.fine("Annotation " + a + " with type=" + a.getType() + " not found in default KR=" + defaultKR);
|
|
| 235 |
} |
|
| 230 | 236 |
} |
| 237 |
else {
|
|
| 238 |
Log.warning("Warning: unknowed type: " + a.getType());
|
|
| 239 |
} |
|
| 231 | 240 |
} |
| 232 |
|
|
| 233 |
if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText],
|
|
| 234 |
allSegmentAnnotations, allTokenAnnotations,
|
|
| 241 |
|
|
| 242 |
if (!writeAnnotationsInFile(currentXMLFile, start_limits[currentText], |
|
| 243 |
allSegmentAnnotations, allTokenAnnotations, |
|
| 235 | 244 |
tmpXMLTXMDirectory, previousXMLTXMDirectory)) {
|
| 236 |
Log.severe("Error while writing annotations of text "+currentXMLFile);
|
|
| 245 |
Log.severe("Error while writing annotations of text " + currentXMLFile);
|
|
| 237 | 246 |
return false; |
| 238 |
} else {
|
|
| 247 |
} |
|
| 248 |
else {
|
|
| 239 | 249 |
if (monitor != null) {
|
| 240 |
monitor.worked(allSegmentAnnotations.size()+allTokenAnnotations.size());
|
|
| 250 |
monitor.worked(allSegmentAnnotations.size() + allTokenAnnotations.size());
|
|
| 241 | 251 |
} |
| 242 | 252 |
} |
| 243 | 253 |
} |
| 244 | 254 |
currentText++; |
| 245 | 255 |
} |
| 246 |
|
|
| 256 |
|
|
| 247 | 257 |
return true; |
| 248 |
} |
|
| 249 |
|
|
| 250 |
//writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory)) |
|
| 258 |
} |
|
| 251 | 259 |
|
| 252 |
protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, |
|
| 253 |
ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException, XMLStreamException{
|
|
| 254 |
Log.fine("Writing annotations for text "+xmlFile+" segment annotations="+segmentAnnotations.size()+" token annotations="+tokenAnnotations.size());
|
|
| 255 |
//System.out.println(segmentAnnotations); |
|
| 256 |
//System.out.println(tokenAnnotations); |
|
| 257 |
|
|
| 260 |
// writeAnnotationInStandoffFile(currentXMLStandoffFile, allAnnotations, annotator, tmpXMLTXMDirectory, previousXMLTXMDirectory)) |
|
| 261 |
|
|
| 262 |
protected boolean writeAnnotationsInFile(File xmlFile, int text_start_position, |
|
| 263 |
ArrayList<Annotation> segmentAnnotations, ArrayList<Annotation> tokenAnnotations, File tmpXMLTXMDirectory, File previousXMLTXMDirectory) throws CqiClientException, IOException, |
|
| 264 |
CqiServerError, InvalidCqpIdException, XMLStreamException {
|
|
| 265 |
Log.fine("Writing annotations for text " + xmlFile + " segment annotations=" + segmentAnnotations.size() + " token annotations=" + tokenAnnotations.size());
|
|
| 266 |
// System.out.println(segmentAnnotations); |
|
| 267 |
// System.out.println(tokenAnnotations); |
|
| 268 |
|
|
| 258 | 269 |
boolean show_debug = Log.getLevel().intValue() < Level.INFO.intValue(); |
| 259 | 270 |
AnnotationInjector annotationInjector = new AnnotationInjector(xmlFile, segmentAnnotations, tokenAnnotations, text_start_position, show_debug); |
| 260 |
|
|
| 271 |
|
|
| 261 | 272 |
File tmpfile = new File(tmpXMLTXMDirectory, xmlFile.getName()); |
| 262 | 273 |
File previousfile = new File(previousXMLTXMDirectory, xmlFile.getName()); |
| 263 |
|
|
| 274 |
|
|
| 264 | 275 |
if (annotationInjector.process(tmpfile)) {
|
| 265 |
if (ValidateXml.test(tmpfile)) { //TODO ALSO check if annotations are well-written
|
|
| 276 |
if (ValidateXml.test(tmpfile)) { // TODO ALSO check if annotations are well-written
|
|
| 266 | 277 |
previousfile.delete(); // in case there is one |
| 267 | 278 |
if (!previousfile.exists() && xmlFile.renameTo(previousfile)) {
|
| 268 | 279 |
tmpfile.renameTo(xmlFile); |
| 269 | 280 |
return true; |
| 270 |
} else {
|
|
| 271 |
Log.severe("Error: could not replace XML-TXM file: "+xmlFile+" with "+tmpfile);
|
|
| 272 | 281 |
} |
| 273 |
} else {
|
|
| 274 |
Log.severe("Error: result file "+tmpfile+" is malformed.");
|
|
| 282 |
else {
|
|
| 283 |
Log.severe("Error: could not replace XML-TXM file: " + xmlFile + " with " + tmpfile);
|
|
| 284 |
} |
|
| 275 | 285 |
} |
| 276 |
} else {
|
|
| 277 |
Log.severe("Error: while processing "+xmlFile+" in temp dir");
|
|
Formats disponibles : Unified diff