Revision 911
tmp/org.txm.wordcloud.feature/feature.xml (revision 911) | ||
---|---|---|
67 | 67 |
</license> |
68 | 68 |
|
69 | 69 |
<requires> |
70 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
71 |
|
|
72 |
<import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/> |
|
73 |
<import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/> |
|
74 |
<import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
75 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
76 |
<import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
|
77 | 70 |
<import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/> |
78 |
<import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
79 | 71 |
<import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
80 | 72 |
<import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/> |
81 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
|
82 |
<import plugin="org.eclipse.osgi" version="3.10.2" match="greaterOrEqual"/> |
|
83 | 73 |
<import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/> |
84 |
<import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/> |
|
85 | 74 |
<import plugin="org.txm.wordcloud.core" version="1.0.0" match="greaterOrEqual"/> |
86 | 75 |
<import plugin="org.txm.chartsengine.rcp"/> |
87 | 76 |
</requires> |
tmp/org.txm.backtomedia.feature/feature.xml (revision 911) | ||
---|---|---|
49 | 49 |
</license> |
50 | 50 |
|
51 | 51 |
<requires> |
52 |
<import plugin="org.eclipse.core.runtime"/> |
|
53 |
<import plugin="org.eclipse.ui"/> |
|
54 |
<import plugin="org.eclipse.core.expressions" version="3.4.500" match="greaterOrEqual"/> |
|
55 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
56 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
|
57 |
<import plugin="org.txm.concordance.core" version="1.0.0" match="greaterOrEqual"/> |
|
58 | 52 |
<import plugin="org.txm.concordance.rcp" version="1.0.0" match="greaterOrEqual"/> |
59 | 53 |
</requires> |
60 | 54 |
|
tmp/org.txm.wordcloud.rcp/META-INF/MANIFEST.MF (revision 911) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 |
Require-Bundle: org.txm.rcp;bundle-version="0.7.8";visibility:=reexpor |
|
3 |
t,org.txm.utils;bundle-version="1.0.0";visibility:=reexport,org.eclip |
|
4 |
se.osgi;bundle-version="3.10.2";visibility:=reexport,org.txm.index.rc |
|
5 |
p;bundle-version="1.0.0";visibility:=reexport,org.eclipse.core.runtim |
|
6 |
e;bundle-version="3.10.0";visibility:=reexport,org.txm.chartsengine.c |
|
7 |
ore;bundle-version="1.0.0";visibility:=reexport,org.txm.index.core;bu |
|
8 |
ndle-version="1.0.0";visibility:=reexport,org.txm.searchengine.cqp.co |
|
9 |
re;bundle-version="1.1.0";visibility:=reexport,org.eclipse.ui;bundle- |
|
10 |
version="3.106.1";visibility:=reexport,org.txm.wordcloud.core;bundle- |
|
11 |
version="1.0.0";visibility:=reexport,org.txm.chartsengine.rcp;visibil |
|
12 |
ity:=reexport,org.txm.core;bundle-version="0.7.0";visibility:=reexpor |
|
13 |
t |
|
2 |
Require-Bundle: org.txm.index.rcp;bundle-version="1.0.0";visibility:=reexport, |
|
3 |
org.txm.wordcloud.core;bundle-version="1.0.0";visibility:=reexport, |
|
4 |
org.txm.chartsengine.rcp;visibility:=reexport |
|
14 | 5 |
Bundle-Vendor: Textometrie.org |
15 | 6 |
Bundle-ActivationPolicy: lazy |
16 | 7 |
Bundle-Version: 1.0.0.qualifier |
tmp/org.txm.textsbalance.rcp/META-INF/MANIFEST.MF (revision 911) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 |
Require-Bundle: org.txm.searchengine.cqp.core;bundle-version="1.1.0";v |
|
3 |
isibility:=reexport,org.txm.searchengine.core;bundle-version="1.0.0"; |
|
4 |
visibility:=reexport,org.txm.rcp;bundle-version="0.7.7";visibility:=r |
|
5 |
eexport,org.eclipse.core.runtime;bundle-version="3.10.0";visibility:= |
|
6 |
reexport,org.eclipse.ui;bundle-version="3.106.1";visibility:=reexport |
|
7 |
,org.eclipse.jface.text;visibility:=reexport,org.eclipse.ui.editors;v |
|
8 |
isibility:=reexport,org.txm.core;bundle-version="0.7.0";visibility:=r |
|
9 |
eexport,org.txm.chartsengine.r.core;visibility:=reexport,org.eclipse. |
|
10 |
core.expressions;bundle-version="3.4.600";visibility:=reexport,org.tx |
|
11 |
m.textsbalance.core;bundle-version="1.0.0";visibility:=reexport,org.t |
|
12 |
xm.chartsengine.jfreechart.core;bundle-version="1.0.0";visibility:=re |
|
13 |
export,org.txm.chartsengine.rcp;bundle-version="1.0.0";visibility:=re |
|
14 |
export |
|
2 |
Require-Bundle: org.txm.textsbalance.core;bundle-version="1.0.0";visibility:=reexport, |
|
3 |
org.txm.chartsengine.rcp;bundle-version="1.0.0";visibility:=reexport |
|
15 | 4 |
Export-Package: org.txm.textsbalance.rcp.adapters,org.txm.textsbalance |
16 | 5 |
.rcp.editors,org.txm.textsbalance.rcp.handlers,org.txm.textsbalance.r |
17 | 6 |
cp.preferences |
tmp/org.txm.analec.rcp/src/org/txm/analec/imports/AnalecAnnotationsImporter.java (revision 911) | ||
---|---|---|
18 | 18 |
import org.apache.commons.lang.StringUtils; |
19 | 19 |
import org.eclipse.core.runtime.IProgressMonitor; |
20 | 20 |
import org.txm.Toolbox; |
21 |
import org.txm.importer.graal.PersonalNamespaceContext;
|
|
21 |
import org.txm.importer.PersonalNamespaceContext; |
|
22 | 22 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
23 | 23 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
24 | 24 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
tmp/org.txm.analec.rcp/src/org/txm/analec/imports/DOMAnalecAnnotationsImporter.java (revision 911) | ||
---|---|---|
8 | 8 |
|
9 | 9 |
import org.eclipse.core.runtime.IProgressMonitor; |
10 | 10 |
import org.txm.Toolbox; |
11 |
import org.txm.importer.graal.PersonalNamespaceContext;
|
|
11 |
import org.txm.importer.PersonalNamespaceContext; |
|
12 | 12 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
13 | 13 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
14 | 14 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
tmp/org.txm.analec.rcp/src/org/txm/analec/export/AnalecAnnotationTEIExporter.java (revision 911) | ||
---|---|---|
17 | 17 |
import org.txm.Toolbox; |
18 | 18 |
import org.txm.importer.StaxIdentityParser; |
19 | 19 |
import org.txm.importer.StaxStackWriter; |
20 |
import org.txm.importer.graal.PersonalNamespaceContext;
|
|
20 |
import org.txm.importer.PersonalNamespaceContext; |
|
21 | 21 |
import org.txm.objects.BaseParameters; |
22 | 22 |
import org.txm.rcp.Application; |
23 | 23 |
import org.txm.rcp.TxmPreferences; |
tmp/org.txm.dictionary.feature/feature.xml (revision 911) | ||
---|---|---|
17 | 17 |
</license> |
18 | 18 |
|
19 | 19 |
<requires> |
20 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
|
21 |
<import plugin="org.txm.libs.groovy-all" version="2.3.3" match="greaterOrEqual"/> |
|
22 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
23 |
<import plugin="org.eclipse.ui"/> |
|
24 |
<import plugin="org.eclipse.core.runtime"/> |
|
25 |
<import plugin="org.txm.utils"/> |
|
26 |
<import plugin="org.eclipse.persistence.jpa" version="2.6.0" match="greaterOrEqual"/> |
|
27 |
<import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/> |
|
28 | 20 |
<import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/> |
29 |
<import plugin="javax.persistence"/> |
|
30 | 21 |
<import plugin="org.txm.annotation.kr.core" version="1.0.0" match="greaterOrEqual"/> |
31 | 22 |
</requires> |
32 | 23 |
|
tmp/org.txm.para.rcp/.classpath (revision 911) | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<classpath> |
3 | 3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
5 | 9 |
<classpathentry kind="src" path="src"/> |
6 | 10 |
<classpathentry kind="output" path="bin"/> |
7 | 11 |
</classpath> |
tmp/org.txm.para.rcp/META-INF/MANIFEST.MF (revision 911) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 |
Require-Bundle: org.txm.utils;bundle-version="1.0.0";visibility:=reexp |
|
3 |
ort,org.eclipse.core.runtime;bundle-version="3.10.0";visibility:=reex |
|
4 |
port,org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport,or |
|
5 |
g.eclipse.ui;visibility:=reexport,org.txm.concordance.rcp;visibility: |
|
6 |
=reexport,org.txm.searchengine.cqp.core;bundle-version="1.1.0";visibi |
|
7 |
lity:=reexport,org.txm.rcp;bundle-version="0.7.8";visibility:=reexpor |
|
8 |
t,org.txm.core;bundle-version="0.7.0";visibility:=reexport,org.txm.pa |
|
9 |
ra.core;visibility:=reexport,org.txm.searchengine.core;bundle-version |
|
10 |
="1.0.0";visibility:=reexport |
|
2 |
Require-Bundle: org.txm.concordance.rcp;visibility:=reexport, |
|
3 |
org.txm.para.core;visibility:=reexport |
|
11 | 4 |
Export-Package: org.txm.para.rcp.editors, |
12 | 5 |
org.txm.para.rcp.handlers, |
13 | 6 |
org.txm.para.rcp.messages |
tmp/org.txm.textsbalance.feature/feature.xml (revision 911) | ||
---|---|---|
65 | 65 |
</license> |
66 | 66 |
|
67 | 67 |
<requires> |
68 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
69 |
<import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
70 |
<import plugin="org.txm.rcp" version="0.7.7" match="greaterOrEqual"/> |
|
71 |
<import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/> |
|
72 |
<import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/> |
|
73 |
<import plugin="org.eclipse.jface.text"/> |
|
74 |
<import plugin="org.eclipse.ui.editors"/> |
|
75 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
76 |
<import plugin="org.txm.chartsengine.r.core"/> |
|
77 |
<import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/> |
|
78 | 68 |
<import plugin="org.txm.textsbalance.core" version="1.0.0" match="greaterOrEqual"/> |
79 |
<import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/> |
|
80 | 69 |
<import plugin="org.txm.chartsengine.rcp" version="1.0.0" match="greaterOrEqual"/> |
70 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
71 |
<import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/> |
|
72 |
<import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
|
81 | 73 |
</requires> |
82 | 74 |
|
83 | 75 |
<plugin |
tmp/org.txm.oriflamms.rcp/META-INF/MANIFEST.MF (revision 911) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 |
Require-Bundle: org.txm.core;bundle-version="0.7.0";visibility:=reexpo |
|
3 |
rt,org.txm.rcp;visibility:=reexport,org.eclipse.ui;visibility:=reexpo |
|
4 |
rt,org.eclipse.core.runtime;visibility:=reexport,org.txm.searchengine |
|
5 |
.cqp.core;visibility:=reexport,org.txm.utils;visibility:=reexport |
|
2 |
Require-Bundle: org.txm.rcp;visibility:=reexport, |
|
3 |
org.txm.searchengine.cqp.rcp;bundle-version="1.0.0" |
|
6 | 4 |
Bundle-Vendor: Textometrie.org |
7 | 5 |
Bundle-ActivationPolicy: lazy |
8 | 6 |
Bundle-Version: 1.0.0.qualifier |
tmp/org.txm.analec.feature/feature.xml (revision 911) | ||
---|---|---|
67 | 67 |
</license> |
68 | 68 |
|
69 | 69 |
<requires> |
70 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
71 |
<import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
72 | 70 |
<import plugin="org.txm.concordance.rcp" version="1.0.0" match="greaterOrEqual"/> |
73 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
74 |
<import plugin="org.eclipse.ui"/> |
|
75 |
<import plugin="org.eclipse.core.runtime"/> |
|
76 |
<import plugin="org.txm.rcp" version="0.7.5" match="greaterOrEqual"/> |
|
77 |
<import plugin="org.eclipse.ui.browser" version="3.4.100" match="greaterOrEqual"/> |
|
78 |
<import plugin="org.eclipse.jface.databinding" version="1.6.200" match="greaterOrEqual"/> |
|
79 |
<import plugin="org.eclipse.jface.text" version="3.9.2" match="greaterOrEqual"/> |
|
80 |
<import plugin="org.txm.links.rcp" version="1.0.0" match="greaterOrEqual"/> |
|
81 |
<import plugin="org.eclipse.jface"/> |
|
82 |
<import plugin="org.eclipse.swt"/> |
|
83 |
<import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/> |
|
84 |
<import plugin="org.txm.core" version="0.8.0" match="greaterOrEqual"/> |
|
85 |
<import plugin="org.txm.rcp" version="0.8.0" match="greaterOrEqual"/> |
|
86 |
<import plugin="org.txm.utils"/> |
|
87 | 71 |
<import plugin="org.txm.progression.rcp" version="1.0.0" match="greaterOrEqual"/> |
88 |
<import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
89 | 72 |
<import plugin="org.txm.chartsengine.jfreechart.rcp" version="1.0.0" match="greaterOrEqual"/> |
90 | 73 |
<import plugin="org.txm.chartsengine.r.rcp" version="1.0.0" match="greaterOrEqual"/> |
91 |
<import plugin="org.txm.lexicaltable.rcp" version="1.0.0" match="greaterOrEqual"/> |
|
92 |
<import plugin="org.txm.edition.rcp" version="1.0.0" match="greaterOrEqual"/> |
|
93 | 74 |
<import plugin="org.txm.annotation.rcp"/> |
94 | 75 |
</requires> |
95 | 76 |
|
tmp/org.txm.treetagger.files.feature/feature.xml (revision 911) | ||
---|---|---|
69 | 69 |
</license> |
70 | 70 |
|
71 | 71 |
<requires> |
72 |
<import plugin="org.txm.utils"/> |
|
73 |
<import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/> |
|
74 | 72 |
<import plugin="org.txm.core" version="0.8.0" match="greaterOrEqual"/> |
75 | 73 |
<import plugin="org.txm.treetagger.core" version="1.0.0" match="greaterOrEqual"/> |
76 |
<import plugin="org.eclipse.ui"/> |
|
77 |
<import plugin="org.eclipse.swt"/> |
|
78 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
79 | 74 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
80 |
<import plugin="org.txm.libs.groovy-all" version="2.3.3" match="greaterOrEqual"/> |
|
81 |
<import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/> |
|
82 | 75 |
</requires> |
83 | 76 |
|
84 | 77 |
<plugin |
tmp/org.txm.tigersearch.feature/feature.xml (revision 911) | ||
---|---|---|
17 | 17 |
</license> |
18 | 18 |
|
19 | 19 |
<requires> |
20 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
|
21 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
22 |
<import plugin="org.eclipse.ui"/> |
|
23 |
<import plugin="org.eclipse.core.runtime"/> |
|
24 |
<import plugin="org.eclipse.ui.editors" version="3.8.200" match="greaterOrEqual"/> |
|
25 |
<import plugin="org.eclipse.swt"/> |
|
26 |
<import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
27 |
<import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/> |
|
28 |
<import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/> |
|
29 |
<import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/> |
|
30 |
<import plugin="org.txm.statsengine.r.rcp" version="1.0.0" match="greaterOrEqual"/> |
|
31 |
<import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/> |
|
32 | 20 |
<import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/> |
33 |
<import plugin="org.txm.utils"/> |
|
34 |
<import plugin="org.txm.groovy.core" version="1.0.0" match="greaterOrEqual"/> |
|
35 | 21 |
</requires> |
36 | 22 |
|
37 | 23 |
<plugin |
tmp/org.txm.cql2lsa.feature/feature.xml (revision 911) | ||
---|---|---|
17 | 17 |
</license> |
18 | 18 |
|
19 | 19 |
<requires> |
20 |
<import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/> |
|
21 | 20 |
<import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/> |
22 |
<import plugin="org.eclipse.ui"/> |
|
23 |
<import plugin="org.eclipse.core.runtime"/> |
|
24 |
<import plugin="org.eclipse.ui.editors" version="3.8.100" match="greaterOrEqual"/> |
|
25 |
<import plugin="org.eclipse.core.expressions" version="3.4.500" match="greaterOrEqual"/> |
|
26 |
<import plugin="org.txm.index.core"/> |
|
27 |
<import plugin="org.txm.lexicaltable.core"/> |
|
28 |
<import plugin="org.txm.statsengine.r.core"/> |
|
29 | 21 |
</requires> |
30 | 22 |
|
31 | 23 |
<plugin |
tmp/org.txm.tigersearch.rcp/.settings/org.eclipse.jdt.groovy.core.prefs (revision 911) | ||
---|---|---|
1 | 1 |
eclipse.preferences.version=1 |
2 |
groovy.compiler.level=23 |
|
2 |
groovy.compiler.level=-1 |
|
3 |
groovy.script.filters=**/*.dsld,y,**/*.gradle,n |
tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 911) | ||
---|---|---|
131 | 131 |
ls.debugger,org.mozilla.javascript.tools.idswitch,org.mozilla.javascr |
132 | 132 |
ipt.tools.jsc,org.mozilla.javascript.tools.shell,org.relaxng.datatype |
133 | 133 |
,org.relaxng.datatype.helpers,org.txm.export.ts,org.txm.function.tige |
134 |
rsearch,org.txm.importer.srcmf,org.txm.importer.tigersearch,org.txm.s
|
|
134 |
rsearch,org.txm.s |
|
135 | 135 |
earchengine.ts,org.txm.test,org.txm.tigersearch.commands,org.txm.tige |
136 | 136 |
rsearch.editors,org.w3c.css.sac,org.w3c.css.sac.helpers,org.w3c.dom.s |
137 | 137 |
mil,org.w3c.dom.svg,tigerAPI,tigerAPI.converters,tigerAPI.theories.ho |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TSImport.groovy (revision 911) | ||
---|---|---|
1 |
package org.txm.importer.tigersearch; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.util.ArrayList; |
|
5 |
|
|
6 |
import ims.tiger.index.writer.* |
|
7 |
import ims.tiger.system.* |
|
8 |
|
|
9 |
import org.txm.Toolbox; |
|
10 |
import org.txm.importer.ApplyXsl2; |
|
11 |
import org.txm.importer.xtz.* |
|
12 |
import org.txm.objects.BaseParameters |
|
13 |
import org.txm.utils.BundleUtils; |
|
14 |
import org.txm.utils.io.FileCopy; |
|
15 |
import org.apache.log4j.BasicConfigurator; |
|
16 |
|
|
17 |
class TSImport extends XTZImport { |
|
18 |
|
|
19 |
public TSImport(BaseParameters params) { |
|
20 |
super(params); |
|
21 |
} |
|
22 |
|
|
23 |
@Override |
|
24 |
public void init(BaseParameters p) { |
|
25 |
super.init(p); |
|
26 |
|
|
27 |
importer = new TSImporter(this); // only to build metadata |
|
28 |
compiler = new XTZCompiler(this) |
|
29 |
annotater = null; // no annotater step to do |
|
30 |
pager = new XTZPager(this) |
|
31 |
} |
|
32 |
|
|
33 |
/** |
|
34 |
* Do a XTZ Import then build the TIGERSearch indexes in the binary corpus "tiger" directory |
|
35 |
*/ |
|
36 |
@Override |
|
37 |
public void start() throws InterruptedException { |
|
38 |
File tigerSrcDir = sourceDirectory |
|
39 |
|
|
40 |
def xmlFiles = [] |
|
41 |
sourceDirectory.listFiles(new FileFilter() { |
|
42 |
boolean accept(File file) { |
|
43 |
if (file.isDirectory()) return false; |
|
44 |
if (file.isHidden()) return false; |
|
45 |
if (file.getName().equals("import.xml")) return false; |
|
46 |
if (!file.getName().endsWith(".xml")) return false; |
|
47 |
|
|
48 |
xmlFiles << file |
|
49 |
} |
|
50 |
}); |
|
51 |
|
|
52 |
xmlFiles.remove(new File(sourceDirectory, "import.xml")) |
|
53 |
|
|
54 |
if (xmlFiles.size() == 0) { |
|
55 |
println "Error no XML file found in $sourceDirectory" |
|
56 |
isSuccessful = false; |
|
57 |
return; |
|
58 |
} |
|
59 |
|
|
60 |
File master = xmlFiles[0]; |
|
61 |
println "Main TIGER XML file found: $master" |
|
62 |
|
|
63 |
File tsXSLFile = new File(Toolbox.getTXMHOMEPATH(), "xsl/ts.xsl"); |
|
64 |
BundleUtils.copyFiles("TIGERSearchRCP", "src", "org/txm/importer/tigersearch", "ts.xsl", tsXSLFile.getParentFile()); |
|
65 |
|
|
66 |
File xmltxmSrcDir = new File(binaryDirectory, "src"); // output directory of the TS XSL transformation |
|
67 |
xmltxmSrcDir.mkdirs(); |
|
68 |
FileCopy.copy(master, new File(xmltxmSrcDir, master.getName())); |
|
69 |
|
|
70 |
if (!ApplyXsl2.processImportSources(tsXSLFile, xmltxmSrcDir, xmltxmSrcDir)) { |
|
71 |
println "Error while applying TS XSL file to $tigerSrcDir" |
|
72 |
isSuccessful = false; |
|
73 |
return; |
|
74 |
} |
|
75 |
|
|
76 |
File[] files = xmltxmSrcDir.listFiles(); |
|
77 |
if (files == null || files.length == 0) { |
|
78 |
println "Error while applying TS XSL file to $xmltxmSrcDir is empty" |
|
79 |
isSuccessful = false; |
|
80 |
return; |
|
81 |
} |
|
82 |
|
|
83 |
sourceDirectory = xmltxmSrcDir; // hop |
|
84 |
File txmDir = new File(binaryDirectory, "txm/"+corpusName); |
|
85 |
txmDir.mkdirs(); |
|
86 |
FileCopy.copyFiles(sourceDirectory, txmDir) // the compiler step will use these files |
|
87 |
|
|
88 |
super.start(); // call the usual XTZ import |
|
89 |
|
|
90 |
if (isSuccessful) { |
|
91 |
|
|
92 |
File tigerDir = new File(binaryDirectory, "tiger"); |
|
93 |
tigerDir.mkdir(); |
|
94 |
|
|
95 |
|
|
96 |
File logprop = new File(tigerDir, "tigersearch.logprop"); |
|
97 |
|
|
98 |
logprop.withWriter("UTF-8") { writer -> |
|
99 |
writer.write("""# Default log configuration of the TIGERSearch suite |
|
100 |
log4j.rootLogger=WARN,Logfile |
|
101 |
log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=WARNING |
|
102 |
log4j.appender.Logfile=org.apache.log4j.RollingFileAppender |
|
103 |
log4j.appender.Logfile.File=\${user.home}/tigersearch/tigersearch.log |
|
104 |
log4j.appender.Logfile.MaxFileSize=500KB |
|
105 |
log4j.appender.Logfile.MaxBackupIndex=1 |
|
106 |
log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout |
|
107 |
log4j.appender.Logfile.layout.ConversionPattern=%5r %-5p [%t] %c{2} - %m%n""") |
|
108 |
} |
|
109 |
|
|
110 |
BasicConfigurator.configure(); |
|
111 |
String uri = master.getAbsolutePath(); |
|
112 |
File tigerBinDir = new File(tigerDir, corpusName) |
|
113 |
tigerBinDir.mkdir() |
|
114 |
try { |
|
115 |
IndexBuilderErrorHandler handler = new SimpleErrorHandler(tigerBinDir.getAbsolutePath()); |
|
116 |
XMLIndexing indexing = new XMLIndexing(corpusName, uri, tigerBinDir.getAbsolutePath(), handler,false); |
|
117 |
indexing.startIndexing(); |
|
118 |
} |
|
119 |
catch (Exception e) { System.out.println(e.getMessage()); } |
|
120 |
} |
|
121 |
} |
|
122 |
} |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXML.xsd (revision 911) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> |
|
3 |
|
|
4 |
<!-- ================================================================== |
|
5 |
XML Schema for the TIGER-XML format |
|
6 |
http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXML.xsd |
|
7 |
================================================================== |
|
8 |
TIGER Project, Wolfgang Lezius |
|
9 |
IMS, University of Stuttgart, 04/01/2003 |
|
10 |
================================================================== --> |
|
11 |
|
|
12 |
|
|
13 |
<!-- ====================================================== |
|
14 |
INCLUDES DECLARATION OF THE HEADER |
|
15 |
====================================================== --> |
|
16 |
<xs:include schemaLocation="TigerXMLHeader.xsd"/> |
|
17 |
|
|
18 |
|
|
19 |
<!-- ====================================================== |
|
20 |
INCLUDES DECLARATION OF SUBCORPORA AND SENTENCES |
|
21 |
====================================================== --> |
|
22 |
<xs:include schemaLocation="TigerXMLSubcorpus.xsd"/> |
|
23 |
|
|
24 |
|
|
25 |
<!-- ====================================================== |
|
26 |
DECLARATION OF THE CORPUS DOCUMENT |
|
27 |
====================================================== --> |
|
28 |
|
|
29 |
<!-- declaration of the root element: corpus --> |
|
30 |
|
|
31 |
<xs:element name="corpus"> |
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
<xs:complexType> |
|
36 |
|
|
37 |
<xs:sequence> |
|
38 |
|
|
39 |
<xs:choice> |
|
40 |
<!-- header of the document is optional --> |
|
41 |
<xs:element name="head" type="headType" minOccurs="0" maxOccurs="1" /> |
|
42 |
<xs:/choice> |
|
43 |
|
|
44 |
<xs:element name="body" type="bodyType" minOccurs="1" maxOccurs="1" /> |
|
45 |
|
|
46 |
<xs:/sequence> |
|
47 |
|
|
48 |
<!-- corpus ID --> |
|
49 |
<xs:attribute name="id" type="idType" use="required" /> |
|
50 |
|
|
51 |
<!-- optional attribute: TigerXML version; used by TIGERSearch only --> |
|
52 |
<xs:attribute name="version" type="xsd:string" use="optional" /> |
|
53 |
|
|
54 |
<xs:/complexType> |
|
55 |
<xs:/element> |
|
56 |
|
|
57 |
|
|
58 |
<!-- declaration of the body type --> |
|
59 |
|
|
60 |
<xs:complexType name="bodyType"> |
|
61 |
|
|
62 |
<xs:choice minOccurs="1" maxOccurs="unbounded"> |
|
63 |
<xs:element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/> |
|
64 |
<xs:element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/> |
|
65 |
<xs:/choice> |
|
66 |
|
|
67 |
<xs:/complexType> |
|
68 |
|
|
69 |
|
|
70 |
<xs:/schema> |
|
0 | 71 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/tigersearchLoader.groovy (revision 911) | ||
---|---|---|
1 |
// Copyright ยฉ 2010-2013 ENS de Lyon. |
|
2 |
// Copyright ยฉ 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comtรฉ, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// $LastChangedDate: 2015-06-03 15:04:53 +0200 (mer., 03 juin 2015) $ |
|
23 |
// $LastChangedRevision: 2984 $ |
|
24 |
// $LastChangedBy: mdecorde $ |
|
25 |
// |
|
26 |
package org.txm.importer.tigersearch; |
|
27 |
|
|
28 |
import javax.xml.stream.XMLStreamReader; |
|
29 |
|
|
30 |
import org.txm.sw.RemoveTag; |
|
31 |
import org.txm.importer.ApplyXsl2; |
|
32 |
import org.txm.importer.ValidateXml; |
|
33 |
import org.txm.objects.*; |
|
34 |
import org.txm.tokenizer.TokenizerClasses; |
|
35 |
import org.txm.utils.*; |
|
36 |
import org.txm.*; |
|
37 |
import org.txm.scripts.teitxm.*; |
|
38 |
import org.txm.utils.i18n.*; |
|
39 |
import org.txm.metadatas.*; |
|
40 |
import javax.xml.stream.*; |
|
41 |
import org.w3c.dom.Element |
|
42 |
import org.txm.utils.xml.DomUtils; |
|
43 |
import org.txm.importer.xtz.* |
|
44 |
|
|
45 |
String userDir = System.getProperty("user.home"); |
|
46 |
|
|
47 |
def MONITOR; |
|
48 |
boolean debug = org.txm.utils.logger.Log.isPrintingErrors(); |
|
49 |
BaseParameters params; |
|
50 |
try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
|
51 |
{ println "DEV MODE";//exception means we debug |
|
52 |
debug = true |
|
53 |
params = new BaseParameters(new File(userDir, "xml/roland/import.xml")) |
|
54 |
params.load() |
|
55 |
if (!org.txm.Toolbox.isInitialized()) { |
|
56 |
|
|
57 |
TokenizerClasses.loadFromNode(params.getTokenizerElement(params.getCorpusElement())); |
|
58 |
Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
|
59 |
//Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows |
|
60 |
Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger")); |
|
61 |
//Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File("C:\\Program Files\\treetagger"));//for Windows |
|
62 |
Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models")); |
|
63 |
Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8"); |
|
64 |
Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ","); |
|
65 |
Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\""); |
|
66 |
//Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows |
|
67 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
|
68 |
} |
|
69 |
} |
|
70 |
|
|
71 |
if (params == null) { println "no parameters. Aborting"; return; } |
|
72 |
|
|
73 |
//params.getKeyValueParameters().put(ImportKeys.CLEAN, "false") |
|
74 |
//params.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false") |
|
75 |
//params.getKeyValueParameters().put(ImportKeys.DEBUG, "false") |
|
76 |
//params.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "false") |
|
77 |
|
|
78 |
TSImport i = new TSImport(params); |
|
79 |
i.process(); |
|
80 |
readyToLoad = i.isSuccessful |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/ts.xsl (revision 911) | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
4 |
xmlns:tei="http://www.tei-c.org/ns/1.0" |
|
5 |
xmlns:xd="http://www.pnp-software.com/XSLTdoc" |
|
6 |
xmlns:edate="http://exslt.org/dates-and-times" |
|
7 |
exclude-result-prefixes="edate xd"> |
|
8 |
|
|
9 |
<xd:doc type="stylesheet"> |
|
10 |
|
|
11 |
<xd:short> |
|
12 |
Feuille de transformation du format TIGER-XML vers le format XML-TXM |
|
13 |
</xd:short> |
|
14 |
|
|
15 |
<xd:detail> |
|
16 |
This stylesheet is free software; you can redistribute it and/or |
|
17 |
modify it under the terms of the GNU Lesser General Public |
|
18 |
License as published by the Free Software Foundation; either |
|
19 |
version 3 of the License, or (at your option) any later version. |
|
20 |
|
|
21 |
This stylesheet is distributed in the hope that it will be useful, |
|
22 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
23 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
24 |
Lesser General Public License for more details. |
|
25 |
|
|
26 |
You should have received a copy of GNU Lesser Public License with |
|
27 |
this stylesheet. If not, see http://www.gnu.org/licenses/lgpl.html |
|
28 |
</xd:detail> |
|
29 |
|
|
30 |
<xd:author>Matthieu Decorde, matthieu.decorde AT ens-lyon.fr</xd:author> |
|
31 |
<xd:author>Serge Heiden, slh AT ens-lyon.fr</xd:author> |
|
32 |
<xd:author>Alexey Lavrentev, alexei.lavrentev AT ens-lyon.fr></xd:author> |
|
33 |
|
|
34 |
<xd:copyright>2016, ENS de Lyon/CNRS (UMR IHRIM Cactus)</xd:copyright> |
|
35 |
|
|
36 |
</xd:doc> |
|
37 |
|
|
38 |
<xsl:output |
|
39 |
method="xml" |
|
40 |
encoding="UTF-8" |
|
41 |
indent="yes" /> |
|
42 |
|
|
43 |
<xsl:template match="corpus"> |
|
44 |
<corpus> |
|
45 |
<xsl:choose> |
|
46 |
<xsl:when test="subcorpus"> |
|
47 |
<xsl:apply-templates select="subcorpus"/> |
|
48 |
</xsl:when> |
|
49 |
<xsl:otherwise> |
|
50 |
<text> |
|
51 |
<xsl:apply-templates select="s"/> |
|
52 |
</text> |
|
53 |
</xsl:otherwise> |
|
54 |
</xsl:choose> |
|
55 |
<xsl:apply-templates/> |
|
56 |
</corpus> |
|
57 |
</xsl:template> |
|
58 |
|
|
59 |
<xsl:template match="subcorpus"> |
|
60 |
<text> |
|
61 |
<xsl:attribute name="name"><xsl:value-of select="@name"/></xsl:attribute> |
|
62 |
<xsl:apply-templates select="s"/> |
|
63 |
</text> |
|
64 |
</xsl:template> |
|
65 |
|
|
66 |
<xsl:template match="s"> |
|
67 |
<p> |
|
68 |
<s> |
|
69 |
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute> |
|
70 |
<xsl:apply-templates select="graph/terminals/t" /> |
|
71 |
</s> |
|
72 |
</p> |
|
73 |
</xsl:template> |
|
74 |
|
|
75 |
<xsl:template match="t"> |
|
76 |
<w> |
|
77 |
<xsl:for-each select="@*[not(name()='word')]"> |
|
78 |
<xsl:copy/> |
|
79 |
</xsl:for-each> |
|
80 |
<xsl:value-of select="@word"/> |
|
81 |
</w> |
|
82 |
</xsl:template> |
|
83 |
|
|
84 |
</xsl:stylesheet> |
|
0 | 85 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TSImporter.groovy (revision 911) | ||
---|---|---|
1 |
package org.txm.importer.tigersearch |
|
2 |
|
|
3 |
import org.txm.Toolbox |
|
4 |
import org.txm.importer.xtz.ImportModule; |
|
5 |
import org.txm.importer.xtz.XTZImporter |
|
6 |
import org.txm.metadatas.Metadatas |
|
7 |
import org.txm.utils.io.FileCopy |
|
8 |
|
|
9 |
/** |
|
10 |
* Only build the Metadatas object since all XML-TXM files already exists. |
|
11 |
* Metadatas is used to build text order. |
|
12 |
* |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
class TSImporter extends XTZImporter { |
|
18 |
|
|
19 |
public TSImporter(ImportModule module) { |
|
20 |
super(module); |
|
21 |
} |
|
22 |
|
|
23 |
@Override |
|
24 |
public void process() { |
|
25 |
File binDir = module.getBinaryDirectory(); |
|
26 |
|
|
27 |
//prepare metadata if any |
|
28 |
File allmetadatafile = new File(inputDirectory, "metadata.csv"); |
|
29 |
println allmetadatafile |
|
30 |
if (allmetadatafile.exists()) { |
|
31 |
File copy = new File(binDir, "metadata.csv") |
|
32 |
if (!FileCopy.copy(allmetadatafile, copy)) { |
|
33 |
println "Error: could not create a copy of the metadata file "+allmetadatafile.getAbsoluteFile(); |
|
34 |
return; |
|
35 |
} |
|
36 |
this.metadata = new Metadatas(copy, |
|
37 |
Toolbox.getPreference(Toolbox.METADATA_ENCODING), |
|
38 |
Toolbox.getPreference(Toolbox.METADATA_COLSEPARATOR), |
|
39 |
Toolbox.getPreference(Toolbox.METADATA_TXTSEPARATOR), 1) |
|
40 |
} |
|
41 |
isSuccessFul = true; |
|
42 |
} |
|
43 |
} |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXMLSubcorpus.xsd (revision 911) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> |
|
3 |
|
|
4 |
<!-- =========================================================================== |
|
5 |
XML Schema for the subcorpus part of the TIGER-XML format |
|
6 |
http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXMLSubcorpus.xsd |
|
7 |
=========================================================================== |
|
8 |
TIGER Project, Wolfgang Lezius |
|
9 |
IMS, University of Stuttgart, 04/01/2003 |
|
10 |
=========================================================================== --> |
|
11 |
|
|
12 |
<!-- ====================================================== |
|
13 |
DECLARATION OF SUBCORPORA AND SENTENCES |
|
14 |
====================================================== --> |
|
15 |
|
|
16 |
|
|
17 |
<!-- declaration of the subcorpus element --> |
|
18 |
|
|
19 |
<xs:element name="subcorpus" type="subcorpusType"/> |
|
20 |
|
|
21 |
|
|
22 |
<!-- declaration of the subcorpus type --> |
|
23 |
|
|
24 |
<xs:complexType name="subcorpusType"> |
|
25 |
|
|
26 |
<!-- A subcorpus may comprise another subcorpora or sentences --> |
|
27 |
|
|
28 |
<xs:choice minOccurs="0" maxOccurs="unbounded"> |
|
29 |
<xs:element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/> |
|
30 |
<xs:element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/> |
|
31 |
<xs:/choice> |
|
32 |
|
|
33 |
<!-- required: subcorpus name --> |
|
34 |
|
|
35 |
<xs:attribute name="name" type="xsd:string" use="required"/> |
|
36 |
|
|
37 |
<!-- optional: reference to external subcorpus file |
|
38 |
|
|
39 |
A subcorpus of a TigerXML corpus can also be stored in separate file. |
|
40 |
This attribute points to the external subcorpus file. The pointer is |
|
41 |
an URI. Examples: file:relative.xml or file:/path/to/absolute.xml |
|
42 |
|
|
43 |
Note: If there is a pointer to an external file, the subcorpus |
|
44 |
element must be empty. --> |
|
45 |
|
|
46 |
<xs:attribute name="external" type="xsd:anyURI"/> |
|
47 |
|
|
48 |
<xs:/complexType> |
|
49 |
|
|
50 |
|
|
51 |
<!-- declaration of the sentence type --> |
|
52 |
|
|
53 |
<xs:complexType name="sentenceType"> |
|
54 |
|
|
55 |
<xs:sequence> |
|
56 |
<xs:element name="graph" type="graphType" minOccurs="0" maxOccurs="1"/> |
|
57 |
<xs:element name="matches" type="matchesType" minOccurs="0" maxOccurs="1"/> |
|
58 |
<xs:/sequence> |
|
59 |
|
|
60 |
<xs:attribute name="id" type="idType" use="required"/> |
|
61 |
|
|
62 |
<xs:/complexType> |
|
63 |
|
|
64 |
|
|
65 |
<!-- declaration of the graph type --> |
|
66 |
|
|
67 |
<xs:complexType name="graphType"> |
|
68 |
|
|
69 |
<xs:sequence> |
|
70 |
<xs:element name="terminals" type="terminalsType" minOccurs="1" maxOccurs="1"/> |
|
71 |
<xs:element name="nonterminals" type="nonterminalsType" minOccurs="1" maxOccurs="1"/> |
|
72 |
<xs:/sequence> |
|
73 |
|
|
74 |
<xs:attribute name="root" type="idrefType" use="required"/> |
|
75 |
|
|
76 |
<!-- indicated that the exported sentence is discontinuous --> |
|
77 |
<xs:attribute name="discontinuous" type="xsd:boolean" default="false" use="optional"/> |
|
78 |
|
|
79 |
<xs:/complexType> |
|
80 |
|
|
81 |
|
|
82 |
<!-- declaration of the terminals type --> |
|
83 |
|
|
84 |
<xs:complexType name="terminalsType"> |
|
85 |
|
|
86 |
<xs:sequence> |
|
87 |
<xs:element name="t" type="tType" minOccurs="1" maxOccurs="unbounded"/> |
|
88 |
<xs:/sequence> |
|
89 |
|
|
90 |
<xs:/complexType> |
|
91 |
|
|
92 |
|
|
93 |
<!-- declaration of the t element --> |
|
94 |
|
|
95 |
<xs:complexType name="tType"> |
|
96 |
|
|
97 |
<!-- secondary edges possible --> |
|
98 |
<xs:sequence> |
|
99 |
<xs:element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/> |
|
100 |
<xs:/sequence> |
|
101 |
|
|
102 |
<xs:attribute name="id" type="idType" use="required"/> |
|
103 |
<xs:attributeGroup ref="tfeatureAttributes"/> |
|
104 |
|
|
105 |
<xs:/complexType> |
|
106 |
|
|
107 |
|
|
108 |
<!-- declaration of the nonterminals type --> |
|
109 |
|
|
110 |
<xs:complexType name="nonterminalsType"> |
|
111 |
|
|
112 |
<xs:sequence> |
|
113 |
<xs:element name="nt" type="ntType" minOccurs="0" maxOccurs="unbounded"/> |
|
114 |
<xs:/sequence> |
|
115 |
|
|
116 |
<xs:/complexType> |
|
117 |
|
|
118 |
|
|
119 |
<!-- declaration of the nt element --> |
|
120 |
|
|
121 |
<xs:complexType name="ntType"> |
|
122 |
|
|
123 |
<!-- edge and secondary edges possible --> |
|
124 |
<xs:sequence> |
|
125 |
<xs:element name="edge" type="edgeType" minOccurs="0" maxOccurs="unbounded"/> |
|
126 |
<xs:element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/> |
|
127 |
<xs:/sequence> |
|
128 |
|
|
129 |
<xs:attribute name="id" type="idType" use="required"/> |
|
130 |
<xs:attributeGroup ref="ntfeatureAttributes"/> |
|
131 |
|
|
132 |
<xs:/complexType> |
|
133 |
|
|
134 |
|
|
135 |
<!-- declaration of the edge type --> |
|
136 |
|
|
137 |
<xs:complexType name="edgeType"> |
|
138 |
|
|
139 |
<xs:attribute name="idref" type="idrefType" use="required"/> |
|
140 |
|
|
141 |
<xs:attributeGroup ref="edgelabelAttribute"/> |
|
142 |
|
|
143 |
<xs:/complexType> |
|
144 |
|
|
145 |
|
|
146 |
<!-- declaration of the secondary edge type --> |
|
147 |
|
|
148 |
<xs:complexType name="secedgeType"> |
|
149 |
|
|
150 |
<xs:attribute name="idref" type="idrefType" use="required"/> |
|
151 |
|
|
152 |
<xs:attributeGroup ref="secedgelabelAttribute"/> |
|
153 |
|
|
154 |
<xs:/complexType> |
|
155 |
|
|
156 |
|
|
157 |
<!-- declaration of the matches type --> |
|
158 |
|
|
159 |
<xs:complexType name="matchesType"> |
|
160 |
|
|
161 |
<xs:sequence> |
|
162 |
<xs:element name="match" type="matchType" minOccurs="1" maxOccurs="unbounded"/> |
|
163 |
<xs:/sequence> |
|
164 |
|
|
165 |
<xs:/complexType> |
|
166 |
|
|
167 |
|
|
168 |
<!-- declaration of the match type --> |
|
169 |
|
|
170 |
<xs:complexType name="matchType"> |
|
171 |
|
|
172 |
<xs:sequence> |
|
173 |
<xs:element name="variable" type="varType" minOccurs="1" maxOccurs="unbounded"/> |
|
174 |
<xs:/sequence> |
|
175 |
|
|
176 |
<xs:attribute name="subgraph" type="idrefType" use="required"/> |
|
177 |
|
|
178 |
<xs:/complexType> |
|
179 |
|
|
180 |
|
|
181 |
<!-- declaration of the variable type --> |
|
182 |
|
|
183 |
<xs:complexType name="varType"> |
|
184 |
|
|
185 |
<xs:attribute name="name" type="xsd:string" use="required"/> |
|
186 |
|
|
187 |
<xs:attribute name="idref" type="idrefType" use="required"/> |
|
188 |
|
|
189 |
<xs:/complexType> |
|
190 |
|
|
191 |
|
|
192 |
<!-- ====================================================== |
|
193 |
SENTENCE DECLARATIONS THAT SHOULD BE REFINED |
|
194 |
====================================================== --> |
|
195 |
|
|
196 |
<!-- declaration of the TERMINAL FEATURE ATTRIBUTES; |
|
197 |
this group is unrestricted, but should be refined by a |
|
198 |
specialised, corpus-dependent schema --> |
|
199 |
|
|
200 |
<xs:attributeGroup name="tfeatureAttributes"> |
|
201 |
|
|
202 |
<xs:anyAttribute processContents="skip"/> |
|
203 |
|
|
204 |
<xs:/attributeGroup> |
|
205 |
|
|
206 |
|
|
207 |
<!-- declaration of the NONTERMINAL FEATURE ATTRIBUTES; |
|
208 |
this group is unrestricted, but should be refined by a |
|
209 |
specialised, corpus-dependent schema --> |
|
210 |
|
|
211 |
<xs:attributeGroup name="ntfeatureAttributes"> |
|
212 |
|
|
213 |
<xs:anyAttribute processContents="skip"/> |
|
214 |
|
|
215 |
<xs:/attributeGroup> |
|
216 |
|
|
217 |
|
|
218 |
<!-- declaration of the EDGE-LABEL ATTRIBUTE; |
|
219 |
the label attribute is optional which should be refined by a |
|
220 |
specialised, corpus-dependent schema --> |
|
221 |
|
|
222 |
<xs:attributeGroup name="edgelabelAttribute"> |
|
223 |
|
|
224 |
<xs:attribute name="label" type="xsd:string" use="optional"/> |
|
225 |
|
|
226 |
<xs:/attributeGroup> |
|
227 |
|
|
228 |
|
|
229 |
<!-- declaration of the SECONDARY-EDGE-LABEL ATTRIBUTE; |
|
230 |
the label attribute is optional which should be refined by a |
|
231 |
specialised, corpus-dependent schema --> |
|
232 |
|
|
233 |
<xs:attributeGroup name="secedgelabelAttribute"> |
|
234 |
|
|
235 |
<xs:attribute name="label" type="xsd:string" use="optional"/> |
|
236 |
|
|
237 |
<xs:/attributeGroup> |
|
238 |
|
|
239 |
|
|
240 |
<!-- ====================================================== |
|
241 |
ID and IDREF TYPE DECLARATIONS |
|
242 |
====================================================== --> |
|
243 |
|
|
244 |
<!-- Even though XML Schema are a W3C Recommendation, schema |
|
245 |
support of XML parsers is still restricted. Using some |
|
246 |
parsers you might have problems with the ID and IDREF |
|
247 |
attributes in combination with an "anyAttribute" |
|
248 |
declaration. In this case, just modify the base type |
|
249 |
of the following two declarations to "xsd:string". --> |
|
250 |
|
|
251 |
|
|
252 |
<!-- declaration of idType --> |
|
253 |
|
|
254 |
<xs:simpleType name="idType"> |
|
255 |
|
|
256 |
<xs:restriction base="xsd:ID"/> |
|
257 |
|
|
258 |
<xs:/simpleType> |
|
259 |
|
|
260 |
|
|
261 |
<!-- declaration of idrefType --> |
|
262 |
|
|
263 |
<xs:simpleType name="idrefType"> |
|
264 |
|
|
265 |
<xs:restriction base="xsd:IDREF"/> |
|
266 |
|
|
267 |
<xs:/simpleType> |
|
268 |
|
|
269 |
|
|
270 |
<xs:/schema> |
|
0 | 271 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/package.html (revision 911) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>TIGERSearch import module. This is a prototype that can only manage SRCMF TIGERSearch sources</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXMLHeader.xsd (revision 911) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"> |
|
3 |
|
|
4 |
<!-- ======================================================================= |
|
5 |
XML SubSchema for the header part of the TIGER-XML format |
|
6 |
http://www.ims.uni-stuttgart.de/projekte/TIGER/publicTigerXMLHeader.xsd |
|
7 |
======================================================================= |
|
8 |
TIGER Project, Wolfgang Lezius |
|
9 |
IMS, University of Stuttgart, 04/01/2003 |
|
10 |
======================================================================= --> |
|
11 |
|
|
12 |
|
|
13 |
<!-- ====================================================== |
|
14 |
DECLARATION OF THE HEADER |
|
15 |
====================================================== --> |
|
16 |
|
|
17 |
|
|
18 |
<!-- declaration of the head element --> |
|
19 |
|
|
20 |
<xs:element name="head" type="headType"/> |
|
21 |
|
|
22 |
|
|
23 |
<!-- declaration of the header type --> |
|
24 |
|
|
25 |
<xs:complexType name="headType"> |
|
26 |
|
|
27 |
<xs:sequence> |
|
28 |
<xs:element name="meta" type="metaType" minOccurs="0" maxOccurs="1"/> |
|
29 |
<xs:element name="annotation" type="annotationType" minOccurs="0" maxOccurs="1"/> |
|
30 |
<xs:/sequence> |
|
31 |
|
|
32 |
<!-- optional: reference to external header file |
|
33 |
|
|
34 |
The header of a TigerXML corpus can also be stored in separate file. |
|
35 |
This attribute points to the external header file. The pointer is |
|
36 |
an URI. Examples: file:relative.xml or file:/path/to/absolute.xml |
|
37 |
|
|
38 |
Note: If there is a pointer to an external file, the head |
|
39 |
element must be empty. --> |
|
40 |
|
|
41 |
<xs:attribute name="external" type="xsd:anyURI"/> |
|
42 |
|
|
43 |
<xs:/complexType> |
|
44 |
|
|
45 |
|
|
46 |
<!-- declaration of the meta information type --> |
|
47 |
|
|
48 |
<xs:complexType name="metaType"> |
|
49 |
|
|
50 |
<xs:sequence> |
|
51 |
<xs:element name="name" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
52 |
<xs:element name="author" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
53 |
<xs:element name="date" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
54 |
<xs:element name="description" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
55 |
<xs:element name="format" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
56 |
<xs:element name="history" type="xsd:string" minOccurs="0" maxOccurs="1"/> |
|
57 |
<xs:/sequence> |
|
58 |
|
|
59 |
<xs:/complexType> |
|
60 |
|
|
61 |
|
|
62 |
<!-- declaration of the annotation type --> |
|
63 |
|
|
64 |
<xs:complexType name="annotationType"> |
|
65 |
|
|
66 |
<xs:sequence> |
|
67 |
<xs:element name="feature" type="featureType" minOccurs="1" maxOccurs="unbounded"/> |
|
68 |
<xs:element name="edgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/> |
|
69 |
<xs:element name="secedgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/> |
|
70 |
<xs:/sequence> |
|
71 |
|
|
72 |
<xs:/complexType> |
|
73 |
|
|
74 |
|
|
75 |
<!-- declaration of the feature type --> |
|
76 |
|
|
77 |
<xs:complexType name="featureType"> |
|
78 |
|
|
79 |
<xs:sequence> |
|
80 |
<xs:element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/> |
|
81 |
<xs:/sequence> |
|
82 |
|
|
83 |
<xs:attribute name="name" type="featurenameType" use="required"/> |
|
84 |
|
|
85 |
<xs:attribute name="domain" use="required"> |
|
86 |
<xs:simpleType> |
|
87 |
<xs:restriction base="xsd:string"> |
|
88 |
<xs:enumeration value="T"/> <!-- feature for terminal nodes --> |
|
89 |
<xs:enumeration value="NT"/> <!-- feature for nonterminal nodes --> |
|
90 |
<xs:enumeration value="FREC"/> <!-- feature for both --> |
|
91 |
<xs:/restriction> |
|
92 |
<xs:/simpleType> |
|
93 |
<xs:/attribute> |
|
94 |
|
|
95 |
<xs:/complexType> |
|
96 |
|
|
97 |
|
|
98 |
<!-- declaration of the (secondary) edge label type --> |
|
99 |
|
|
100 |
<xs:complexType name="edgelabelType"> |
|
101 |
|
|
102 |
<xs:sequence> |
|
103 |
<xs:element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/> |
|
104 |
<xs:/sequence> |
|
105 |
|
|
106 |
<xs:/complexType> |
|
107 |
|
|
108 |
|
|
109 |
<!-- declaration of the feature value type --> |
|
110 |
|
|
111 |
<xs:complexType name="featurevalueType"> |
|
112 |
|
|
113 |
<xs:simpleContent> <!-- element content: documentation of the feature value --> |
|
114 |
<xs:extension base="xsd:string"> |
|
115 |
<xs:attribute name="name" type="xsd:string"/> |
|
116 |
<xs:/extension> |
|
117 |
<xs:/simpleContent> |
|
118 |
|
|
119 |
|
|
120 |
<xs:/complexType> |
|
121 |
|
|
122 |
|
|
123 |
<!-- ====================================================== |
|
124 |
HEADER DECLARATIONS THAT SHOULD BE REFINED |
|
125 |
====================================================== --> |
|
126 |
|
|
127 |
<!-- declaration of the FEATURE NAMES used in the corpus header; |
|
128 |
this type is unrestricted, but should be refined by a |
|
129 |
specialised, corpus-dependent schema --> |
|
130 |
|
|
131 |
<xs:simpleType name="featurenameType"> |
|
132 |
|
|
133 |
<xs:restriction base="xsd:string"> |
|
134 |
<xs:minLength value="1"/> |
|
135 |
<xs:maxLength value="20"/> |
|
136 |
<xs:whiteSpace value="preserve"/> |
|
137 |
<xs:/restriction> |
|
138 |
|
|
139 |
<xs:/simpleType> |
|
140 |
|
|
141 |
|
|
142 |
<xs:/schema> |
|
0 | 143 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/package.html (revision 911) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>TIGERSearch import module. This is a prototype that can only manage SRCMF TIGERSearch sources</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/MasterReheader.groovy (revision 911) | ||
---|---|---|
1 |
#! /usr/bin/groovy |
|
2 |
package org.txm.importer.srcmf; |
|
3 |
|
|
4 |
/* |
|
5 |
* To change this template, choose Tools | Templates |
|
6 |
* and open the template in the editor. |
|
7 |
*/ |
|
8 |
|
|
9 |
// Set up globals |
|
10 |
// def masterFile = new File('/home/tomr/Documents/Work/lyon12/srcmf/groovy/SrcmfImport/aucassin_surface/master_pos.xml') |
|
11 |
// def outputFile = new File('/home/tomr/Documents/Work/lyon12/srcmf/groovy/SrcmfImport/aucassin_surface/master_pos2.xml') |
|
12 |
// def headerFile = new File('/home/tomr/Documents/Work/SRCMF/srcmf_ts/header_srcmf.xml') |
|
13 |
// def feats = [nt:['cat', 'type', 'coord'], t:['pos', 'form', 'q']] |
|
14 |
// def firstFeat = [t:'word', nt:'cat'] |
|
15 |
|
|
16 |
// Command-line entry point |
|
17 |
def cli = new CliBuilder( |
|
18 |
usage:'MasterReheader.groovy [options] master_file.xml header_file.xml' |
|
19 |
) |
|
20 |
cli.h(longOpt:'help', 'Prints this message.') |
|
21 |
cli.o(longOpt:'output', args:1, argName:'outputfile.xml', 'Output to given file.') |
|
22 |
cli.nt(longOpt:'ntfeats', args:1, argName:'ntfeats', 'NT features for which to provide value node.') |
|
23 |
cli.t(longOpt:'tfeats', args:1, argName:'tfeats', 'T features for which to provide value node.') |
|
24 |
cli.nt1(longOpt:'ntfeat1st', args:1, argName:'1st ntfeat', 'First NT feature listed in header.') |
|
25 |
cli.t1(longOpt:'tfeat1st', args:1, argName:'1st tfeat', 'First T feature listed in header.') |
|
26 |
options = cli.parse(args) |
|
27 |
if (options.arguments().size() == 2) { |
|
28 |
def masterFile = new File(options.arguments()[0]) |
|
29 |
def headerFile = new File(options.arguments()[1]) |
|
30 |
def masterFolder = masterFile.getCanonicalFile().getParent() |
|
31 |
def outputFile = null |
|
32 |
if (options.o) { |
|
33 |
outputFile = new File(options.o) |
|
34 |
} else { |
|
35 |
outputFile = new File(masterFolder, 'MasterReheader_out.xml') |
|
36 |
} |
|
37 |
def ntfirst = 'cat' |
|
38 |
if (options.nt1) { |
|
39 |
ntfirst = options.nt1 |
|
40 |
} |
|
41 |
def tfirst = 'word' |
|
42 |
if (options.t1) { |
|
43 |
tfirst = options.t1 |
|
44 |
} |
|
45 |
script( |
|
46 |
masterFile, headerFile, outputFile, |
|
47 |
['nt':options.nts, 't':options.ts], |
|
48 |
['nt':options.nt1, 't':options.t1] |
|
49 |
) |
|
50 |
} else { |
|
51 |
println 'Incorrect number of command line arguments... exiting' |
|
52 |
println cli.usage() |
|
53 |
} |
|
54 |
|
|
55 |
def script( |
|
56 |
File masterFile, File headerFile, File outputFile, HashMap feats, HashMap firstFeat |
|
57 |
) { |
|
58 |
// Load master and header files |
|
59 |
def master = new XmlParser().parse(masterFile) |
|
60 |
def header = new XmlParser().parse(headerFile) |
|
61 |
def masterFolder = masterFile.getCanonicalFile().getParent() |
|
62 |
|
|
63 |
// Set up locals |
|
64 |
def attrVal = [nt:[:], t:[:]] |
|
65 |
|
|
66 |
// Scan subcorpus files and build attribute lists. |
|
67 |
master.body.subcorpus.each { |
|
68 |
def subcorpusFile = new File (masterFolder, it.'@external'[5..-1]) |
|
69 |
def subcorpus = new XmlParser().parse(subcorpusFile) |
|
70 |
// Closure for t & nt nodes processing. |
|
71 |
def getvals = { node, type -> |
|
72 |
node.attributes().each { mEntry -> |
|
73 |
if (! attrVal[type].keySet().contains(mEntry.getKey())) { |
|
74 |
attrVal[type][mEntry.getKey()] = new HashSet() |
|
75 |
} |
|
76 |
attrVal[type][mEntry.getKey()].add(mEntry.getValue()) |
|
77 |
} |
|
78 |
} |
|
79 |
subcorpus.s.graph.terminals.t.each { getvals.call(it, 't') } |
|
80 |
subcorpus.s.graph.nonterminals.nt.each { getvals.call(it, 'nt') } |
|
81 |
} |
|
82 |
// Id isn't an attribute in the header. |
|
83 |
attrVal['t'].remove('id') |
|
84 |
attrVal['nt'].remove('id') |
|
85 |
// Remove old feature nodes in master file |
|
86 |
def oldFeatureNodes = master.head.annotation.feature |
|
87 |
while (oldFeatureNodes) { |
|
88 |
node = oldFeatureNodes.pop() |
|
89 |
node.parent().remove(node) |
|
90 |
} |
|
91 |
assert (! master.head.annotation.feature) |
|
92 |
// Check firstFeat was relevant |
|
93 |
['t', 'nt'].each { type -> |
|
94 |
if (! (attrVal[type].keySet().contains(firstFeat[type]))) { |
|
95 |
firstFeat[type] = attrVal[type].keySet().sort()[0] |
|
96 |
} |
|
97 |
} |
|
98 |
assert attrVal['t'].keySet().contains(firstFeat['t']) |
|
99 |
assert attrVal['nt'].keySet().contains(firstFeat['nt']) |
|
100 |
def featList = [:] |
|
101 |
['t', 'nt'].each { type -> |
|
102 |
featList[type] = [firstFeat[type]] |
|
103 |
featList[type].addAll(attrVal[type].keySet().findAll { it != firstFeat[type] }) |
|
104 |
} |
|
105 |
// Add new feature and value nodes |
|
106 |
['t', 'nt'].each { type -> |
|
107 |
featList[type].each { feat -> |
|
108 |
def fNode = new Node(master.head.annotation[0], 'feature', |
|
109 |
['domain':type.toUpperCase(), 'name':feat] |
|
110 |
) |
|
111 |
// Add value node if the node value is given in 'feats' |
|
112 |
if (feats[type].contains(feat)) { |
|
113 |
attrVal[type][feat].each { value -> |
|
114 |
assert header.'**'.feature |
|
115 |
assert header.'**'.feature[0].'@name' |
|
116 |
assert header.'**'.feature[0].'@domain' |
|
117 |
assert ['NT', 'T'].contains(header.'**'.feature[0].'@domain') |
|
118 |
def hFNode = header.'**'.feature.find { |
|
119 |
it.'@name' == feat && ( |
|
120 |
it.'@domain' == type.toUpperCase() || it.'@domain' == 'FREC' |
|
121 |
) |
|
122 |
} |
|
123 |
def vText = '[unknown]' |
|
124 |
if (hFNode && hFNode.value.find { it.'@name' == value }) { |
|
125 |
vText = hFNode.value.find { it.'@name' == value }.text() |
|
126 |
} |
|
127 |
new Node(fNode, 'value', ['name':value], vText) |
|
128 |
} |
|
129 |
} |
|
130 |
} |
|
131 |
} |
|
132 |
|
|
133 |
// Save to output_file |
|
134 |
outputFile.withWriter { writer -> |
|
135 |
writer << groovy.xml.XmlUtil.serialize(master) |
|
136 |
} |
|
137 |
} |
|
138 |
|
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/SrcmfImporter.groovy (revision 911) | ||
---|---|---|
1 |
/* |
|
2 |
* Calls all import scripts |
|
3 |
*/ |
|
4 |
|
|
5 |
/** |
|
6 |
* |
|
7 |
* @author tmr |
|
8 |
*/ |
|
9 |
|
|
10 |
// Command line form of import statements: |
|
11 |
// import MasterReheader |
|
12 |
// import PunctInjectImport |
|
13 |
// import SubcorpusDataInject |
|
14 |
// import TigerSubcorpus2Main |
|
15 |
// import java.util.logging.FileHandler |
|
16 |
// import javax.xml.parsers.DocumentBuilderFactory |
|
17 |
|
|
18 |
// TXM package statement |
|
19 |
package org.txm.importer.srcmf |
|
20 |
|
|
21 |
import java.util.logging.*; |
|
22 |
|
|
23 |
|
|
24 |
// Command line entry point |
|
25 |
def cli = new CliBuilder( |
|
26 |
usage:'SrcmfImport.groovy [options] tiger_master.xml xml_txm.xml header_file.xml' |
|
27 |
) |
|
28 |
cli.h(longOpt:'help', 'Prints this message.') |
|
29 |
options = cli.parse(args) |
|
30 |
if (options.arguments().size() != 3) { |
|
31 |
println 'Incorrect number of command line arguments... exiting' |
|
32 |
println cli.usage() |
|
33 |
System.exit(2) |
|
34 |
} |
|
35 |
|
|
36 |
def tigerFile = new File(options.arguments()[0]) |
|
37 |
def txmFile = new File(options.arguments()[1]) |
|
38 |
def headerFile = new File(options.arguments()[2]) |
|
39 |
def tigerXmlAll = doAllButPnc( |
|
40 |
tigerFile, |
|
41 |
txmFile, |
|
42 |
headerFile, |
|
43 |
txmFile.getAbsoluteFile().getParentFile().getParentFile() |
|
44 |
) |
|
45 |
doPnc(tigerXmlAll, txmFile) |
|
46 |
tigerXmlAll.delete() |
|
47 |
|
|
48 |
def doAllButPnc(File tigerFile, File txmFile, File headerFile, File binDir) { |
|
49 |
// Run pos injection script |
|
50 |
File txmSrcDir = txmFile.getAbsoluteFile().getParentFile() |
|
51 |
File tigerDir = new File(binDir, "tiger") |
|
52 |
tigerDir.mkdir() |
|
53 |
File masterpos = new File(tigerDir, "master_pos.xml") |
|
54 |
File xmltxm = txmSrcDir.listFiles()[0] |
|
55 |
File logFile = new File(binDir, "tiger.log") |
|
56 |
def sdi = new SubcorpusDataInject( |
|
57 |
xmltxm, |
|
58 |
new FileHandler(logFile.getAbsolutePath()), "vers" |
|
59 |
) |
|
60 |
sdi.processMaster(tigerFile, masterpos) |
|
61 |
// Run reheader script |
|
62 |
def reheader = new MasterReheader() |
|
63 |
File tmp = File.createTempFile("tmp", ".xml",tigerDir) |
|
64 |
def feats = ['nt':['cat', 'type', 'coord'], 't':['pos', 'form', 'q']] |
|
65 |
def firstFeat = ['nt':'cat', 't':'word'] |
|
66 |
reheader.script(masterpos, headerFile, tmp, feats, firstFeat) |
|
67 |
if (!tmp.exists()) { |
|
68 |
println "Error: reheader failed" |
|
69 |
} |
|
70 |
masterpos.delete() |
|
71 |
tmp.renameTo(masterpos) |
|
72 |
// Run merge master & subcorpus script |
|
73 |
def tigerXmlAll = new File(masterpos.getParentFile(), "TigerAll.xml") |
|
74 |
def mergescript = new TigerSubcorpus2Main() |
|
75 |
mergescript.script(masterpos, tigerXmlAll) |
|
76 |
return tigerXmlAll |
|
77 |
} |
|
78 |
|
|
79 |
def doPnc(File tigerXmlAll, File txmFile) { |
|
80 |
injector = new PunctInjectImport(tigerXmlAll, txmFile) |
|
81 |
injector.outputFile = new File(tigerXmlAll.getParentFile(), "TigerPnc.xml") |
|
82 |
injector.process() |
|
83 |
} |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/SubcorpusDataInject.groovy (revision 911) | ||
---|---|---|
1 |
#! /usr/bin/groovy |
|
2 |
package org.txm.importer.srcmf; |
|
3 |
/* |
Also available in: Unified diff