Revision 911

tmp/org.txm.wordcloud.feature/feature.xml (revision 911)
67 67
   </license>
68 68

  
69 69
   <requires>
70
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
71

  
72
      <import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/>
73
      <import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/>
74
      <import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/>
75
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
76
      <import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
77 70
      <import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/>
78
      <import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/>
79 71
      <import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
80 72
      <import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/>
81
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
82
      <import plugin="org.eclipse.osgi" version="3.10.2" match="greaterOrEqual"/>
83 73
      <import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/>
84
      <import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/>
85 74
      <import plugin="org.txm.wordcloud.core" version="1.0.0" match="greaterOrEqual"/>
86 75
      <import plugin="org.txm.chartsengine.rcp"/>
87 76
   </requires>
tmp/org.txm.backtomedia.feature/feature.xml (revision 911)
49 49
   </license>
50 50

  
51 51
   <requires>
52
      <import plugin="org.eclipse.core.runtime"/>
53
      <import plugin="org.eclipse.ui"/>
54
      <import plugin="org.eclipse.core.expressions" version="3.4.500" match="greaterOrEqual"/>
55
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
56
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
57
      <import plugin="org.txm.concordance.core" version="1.0.0" match="greaterOrEqual"/>
58 52
      <import plugin="org.txm.concordance.rcp" version="1.0.0" match="greaterOrEqual"/>
59 53
   </requires>
60 54

  
tmp/org.txm.wordcloud.rcp/META-INF/MANIFEST.MF (revision 911)
1 1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.rcp;bundle-version="0.7.8";visibility:=reexpor
3
 t,org.txm.utils;bundle-version="1.0.0";visibility:=reexport,org.eclip
4
 se.osgi;bundle-version="3.10.2";visibility:=reexport,org.txm.index.rc
5
 p;bundle-version="1.0.0";visibility:=reexport,org.eclipse.core.runtim
6
 e;bundle-version="3.10.0";visibility:=reexport,org.txm.chartsengine.c
7
 ore;bundle-version="1.0.0";visibility:=reexport,org.txm.index.core;bu
8
 ndle-version="1.0.0";visibility:=reexport,org.txm.searchengine.cqp.co
9
 re;bundle-version="1.1.0";visibility:=reexport,org.eclipse.ui;bundle-
10
 version="3.106.1";visibility:=reexport,org.txm.wordcloud.core;bundle-
11
 version="1.0.0";visibility:=reexport,org.txm.chartsengine.rcp;visibil
12
 ity:=reexport,org.txm.core;bundle-version="0.7.0";visibility:=reexpor
13
 t
2
Require-Bundle: org.txm.index.rcp;bundle-version="1.0.0";visibility:=reexport,
3
 org.txm.wordcloud.core;bundle-version="1.0.0";visibility:=reexport,
4
 org.txm.chartsengine.rcp;visibility:=reexport
14 5
Bundle-Vendor: Textometrie.org
15 6
Bundle-ActivationPolicy: lazy
16 7
Bundle-Version: 1.0.0.qualifier
tmp/org.txm.textsbalance.rcp/META-INF/MANIFEST.MF (revision 911)
1 1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.searchengine.cqp.core;bundle-version="1.1.0";v
3
 isibility:=reexport,org.txm.searchengine.core;bundle-version="1.0.0";
4
 visibility:=reexport,org.txm.rcp;bundle-version="0.7.7";visibility:=r
5
 eexport,org.eclipse.core.runtime;bundle-version="3.10.0";visibility:=
6
 reexport,org.eclipse.ui;bundle-version="3.106.1";visibility:=reexport
7
 ,org.eclipse.jface.text;visibility:=reexport,org.eclipse.ui.editors;v
8
 isibility:=reexport,org.txm.core;bundle-version="0.7.0";visibility:=r
9
 eexport,org.txm.chartsengine.r.core;visibility:=reexport,org.eclipse.
10
 core.expressions;bundle-version="3.4.600";visibility:=reexport,org.tx
11
 m.textsbalance.core;bundle-version="1.0.0";visibility:=reexport,org.t
12
 xm.chartsengine.jfreechart.core;bundle-version="1.0.0";visibility:=re
13
 export,org.txm.chartsengine.rcp;bundle-version="1.0.0";visibility:=re
14
 export
2
Require-Bundle: org.txm.textsbalance.core;bundle-version="1.0.0";visibility:=reexport,
3
 org.txm.chartsengine.rcp;bundle-version="1.0.0";visibility:=reexport
15 4
Export-Package: org.txm.textsbalance.rcp.adapters,org.txm.textsbalance
16 5
 .rcp.editors,org.txm.textsbalance.rcp.handlers,org.txm.textsbalance.r
17 6
 cp.preferences
tmp/org.txm.analec.rcp/src/org/txm/analec/imports/AnalecAnnotationsImporter.java (revision 911)
18 18
import org.apache.commons.lang.StringUtils;
19 19
import org.eclipse.core.runtime.IProgressMonitor;
20 20
import org.txm.Toolbox;
21
import org.txm.importer.graal.PersonalNamespaceContext;
21
import org.txm.importer.PersonalNamespaceContext;
22 22
import org.txm.searchengine.cqp.AbstractCqiClient;
23 23
import org.txm.searchengine.cqp.CQPSearchEngine;
24 24
import org.txm.searchengine.cqp.corpus.MainCorpus;
tmp/org.txm.analec.rcp/src/org/txm/analec/imports/DOMAnalecAnnotationsImporter.java (revision 911)
8 8

  
9 9
import org.eclipse.core.runtime.IProgressMonitor;
10 10
import org.txm.Toolbox;
11
import org.txm.importer.graal.PersonalNamespaceContext;
11
import org.txm.importer.PersonalNamespaceContext;
12 12
import org.txm.searchengine.cqp.AbstractCqiClient;
13 13
import org.txm.searchengine.cqp.CQPSearchEngine;
14 14
import org.txm.searchengine.cqp.corpus.MainCorpus;
tmp/org.txm.analec.rcp/src/org/txm/analec/export/AnalecAnnotationTEIExporter.java (revision 911)
17 17
import org.txm.Toolbox;
18 18
import org.txm.importer.StaxIdentityParser;
19 19
import org.txm.importer.StaxStackWriter;
20
import org.txm.importer.graal.PersonalNamespaceContext;
20
import org.txm.importer.PersonalNamespaceContext;
21 21
import org.txm.objects.BaseParameters;
22 22
import org.txm.rcp.Application;
23 23
import org.txm.rcp.TxmPreferences;
tmp/org.txm.dictionary.feature/feature.xml (revision 911)
17 17
   </license>
18 18

  
19 19
   <requires>
20
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
21
      <import plugin="org.txm.libs.groovy-all" version="2.3.3" match="greaterOrEqual"/>
22
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
23
      <import plugin="org.eclipse.ui"/>
24
      <import plugin="org.eclipse.core.runtime"/>
25
      <import plugin="org.txm.utils"/>
26
      <import plugin="org.eclipse.persistence.jpa" version="2.6.0" match="greaterOrEqual"/>
27
      <import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/>
28 20
      <import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/>
29
      <import plugin="javax.persistence"/>
30 21
      <import plugin="org.txm.annotation.kr.core" version="1.0.0" match="greaterOrEqual"/>
31 22
   </requires>
32 23

  
tmp/org.txm.para.rcp/.classpath (revision 911)
1 1
<?xml version="1.0" encoding="UTF-8"?>
2 2
<classpath>
3 3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
5 9
	<classpathentry kind="src" path="src"/>
6 10
	<classpathentry kind="output" path="bin"/>
7 11
</classpath>
tmp/org.txm.para.rcp/META-INF/MANIFEST.MF (revision 911)
1 1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.utils;bundle-version="1.0.0";visibility:=reexp
3
 ort,org.eclipse.core.runtime;bundle-version="3.10.0";visibility:=reex
4
 port,org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport,or
5
 g.eclipse.ui;visibility:=reexport,org.txm.concordance.rcp;visibility:
6
 =reexport,org.txm.searchengine.cqp.core;bundle-version="1.1.0";visibi
7
 lity:=reexport,org.txm.rcp;bundle-version="0.7.8";visibility:=reexpor
8
 t,org.txm.core;bundle-version="0.7.0";visibility:=reexport,org.txm.pa
9
 ra.core;visibility:=reexport,org.txm.searchengine.core;bundle-version
10
 ="1.0.0";visibility:=reexport
2
Require-Bundle: org.txm.concordance.rcp;visibility:=reexport,
3
 org.txm.para.core;visibility:=reexport
11 4
Export-Package: org.txm.para.rcp.editors,
12 5
 org.txm.para.rcp.handlers,
13 6
 org.txm.para.rcp.messages
tmp/org.txm.textsbalance.feature/feature.xml (revision 911)
65 65
   </license>
66 66

  
67 67
   <requires>
68
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
69
      <import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/>
70
      <import plugin="org.txm.rcp" version="0.7.7" match="greaterOrEqual"/>
71
      <import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/>
72
      <import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/>
73
      <import plugin="org.eclipse.jface.text"/>
74
      <import plugin="org.eclipse.ui.editors"/>
75
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
76
      <import plugin="org.txm.chartsengine.r.core"/>
77
      <import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/>
78 68
      <import plugin="org.txm.textsbalance.core" version="1.0.0" match="greaterOrEqual"/>
79
      <import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/>
80 69
      <import plugin="org.txm.chartsengine.rcp" version="1.0.0" match="greaterOrEqual"/>
70
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
71
      <import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/>
72
      <import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
81 73
   </requires>
82 74

  
83 75
   <plugin
tmp/org.txm.oriflamms.rcp/META-INF/MANIFEST.MF (revision 911)
1 1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.core;bundle-version="0.7.0";visibility:=reexpo
3
 rt,org.txm.rcp;visibility:=reexport,org.eclipse.ui;visibility:=reexpo
4
 rt,org.eclipse.core.runtime;visibility:=reexport,org.txm.searchengine
5
 .cqp.core;visibility:=reexport,org.txm.utils;visibility:=reexport
2
Require-Bundle: org.txm.rcp;visibility:=reexport,
3
 org.txm.searchengine.cqp.rcp;bundle-version="1.0.0"
6 4
Bundle-Vendor: Textometrie.org
7 5
Bundle-ActivationPolicy: lazy
8 6
Bundle-Version: 1.0.0.qualifier
tmp/org.txm.analec.feature/feature.xml (revision 911)
67 67
   </license>
68 68

  
69 69
   <requires>
70
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
71
      <import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/>
72 70
      <import plugin="org.txm.concordance.rcp" version="1.0.0" match="greaterOrEqual"/>
73
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
74
      <import plugin="org.eclipse.ui"/>
75
      <import plugin="org.eclipse.core.runtime"/>
76
      <import plugin="org.txm.rcp" version="0.7.5" match="greaterOrEqual"/>
77
      <import plugin="org.eclipse.ui.browser" version="3.4.100" match="greaterOrEqual"/>
78
      <import plugin="org.eclipse.jface.databinding" version="1.6.200" match="greaterOrEqual"/>
79
      <import plugin="org.eclipse.jface.text" version="3.9.2" match="greaterOrEqual"/>
80
      <import plugin="org.txm.links.rcp" version="1.0.0" match="greaterOrEqual"/>
81
      <import plugin="org.eclipse.jface"/>
82
      <import plugin="org.eclipse.swt"/>
83
      <import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/>
84
      <import plugin="org.txm.core" version="0.8.0" match="greaterOrEqual"/>
85
      <import plugin="org.txm.rcp" version="0.8.0" match="greaterOrEqual"/>
86
      <import plugin="org.txm.utils"/>
87 71
      <import plugin="org.txm.progression.rcp" version="1.0.0" match="greaterOrEqual"/>
88
      <import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/>
89 72
      <import plugin="org.txm.chartsengine.jfreechart.rcp" version="1.0.0" match="greaterOrEqual"/>
90 73
      <import plugin="org.txm.chartsengine.r.rcp" version="1.0.0" match="greaterOrEqual"/>
91
      <import plugin="org.txm.lexicaltable.rcp" version="1.0.0" match="greaterOrEqual"/>
92
      <import plugin="org.txm.edition.rcp" version="1.0.0" match="greaterOrEqual"/>
93 74
      <import plugin="org.txm.annotation.rcp"/>
94 75
   </requires>
95 76

  
tmp/org.txm.treetagger.files.feature/feature.xml (revision 911)
69 69
   </license>
70 70

  
71 71
   <requires>
72
      <import plugin="org.txm.utils"/>
73
      <import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/>
74 72
      <import plugin="org.txm.core" version="0.8.0" match="greaterOrEqual"/>
75 73
      <import plugin="org.txm.treetagger.core" version="1.0.0" match="greaterOrEqual"/>
76
      <import plugin="org.eclipse.ui"/>
77
      <import plugin="org.eclipse.swt"/>
78
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
79 74
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
80
      <import plugin="org.txm.libs.groovy-all" version="2.3.3" match="greaterOrEqual"/>
81
      <import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/>
82 75
   </requires>
83 76

  
84 77
   <plugin
tmp/org.txm.tigersearch.feature/feature.xml (revision 911)
17 17
   </license>
18 18

  
19 19
   <requires>
20
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
21
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
22
      <import plugin="org.eclipse.ui"/>
23
      <import plugin="org.eclipse.core.runtime"/>
24
      <import plugin="org.eclipse.ui.editors" version="3.8.200" match="greaterOrEqual"/>
25
      <import plugin="org.eclipse.swt"/>
26
      <import plugin="org.txm.searchengine.core" version="1.0.0" match="greaterOrEqual"/>
27
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
28
      <import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/>
29
      <import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
30
      <import plugin="org.txm.statsengine.r.rcp" version="1.0.0" match="greaterOrEqual"/>
31
      <import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/>
32 20
      <import plugin="org.txm.index.rcp" version="1.0.0" match="greaterOrEqual"/>
33
      <import plugin="org.txm.utils"/>
34
      <import plugin="org.txm.groovy.core" version="1.0.0" match="greaterOrEqual"/>
35 21
   </requires>
36 22

  
37 23
   <plugin
tmp/org.txm.cql2lsa.feature/feature.xml (revision 911)
17 17
   </license>
18 18

  
19 19
   <requires>
20
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
21 20
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
22
      <import plugin="org.eclipse.ui"/>
23
      <import plugin="org.eclipse.core.runtime"/>
24
      <import plugin="org.eclipse.ui.editors" version="3.8.100" match="greaterOrEqual"/>
25
      <import plugin="org.eclipse.core.expressions" version="3.4.500" match="greaterOrEqual"/>
26
      <import plugin="org.txm.index.core"/>
27
      <import plugin="org.txm.lexicaltable.core"/>
28
      <import plugin="org.txm.statsengine.r.core"/>
29 21
   </requires>
30 22

  
31 23
   <plugin
tmp/org.txm.tigersearch.rcp/.settings/org.eclipse.jdt.groovy.core.prefs (revision 911)
1 1
eclipse.preferences.version=1
2
groovy.compiler.level=23
2
groovy.compiler.level=-1
3
groovy.script.filters=**/*.dsld,y,**/*.gradle,n
tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 911)
131 131
 ls.debugger,org.mozilla.javascript.tools.idswitch,org.mozilla.javascr
132 132
 ipt.tools.jsc,org.mozilla.javascript.tools.shell,org.relaxng.datatype
133 133
 ,org.relaxng.datatype.helpers,org.txm.export.ts,org.txm.function.tige
134
 rsearch,org.txm.importer.srcmf,org.txm.importer.tigersearch,org.txm.s
134
 rsearch,org.txm.s
135 135
 earchengine.ts,org.txm.test,org.txm.tigersearch.commands,org.txm.tige
136 136
 rsearch.editors,org.w3c.css.sac,org.w3c.css.sac.helpers,org.w3c.dom.s
137 137
 mil,org.w3c.dom.svg,tigerAPI,tigerAPI.converters,tigerAPI.theories.ho
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TSImport.groovy (revision 911)
1
package org.txm.importer.tigersearch;
2

  
3
import java.io.File;
4
import java.util.ArrayList;
5

  
6
import ims.tiger.index.writer.*
7
import ims.tiger.system.*
8

  
9
import org.txm.Toolbox;
10
import org.txm.importer.ApplyXsl2;
11
import org.txm.importer.xtz.*
12
import org.txm.objects.BaseParameters
13
import org.txm.utils.BundleUtils;
14
import org.txm.utils.io.FileCopy;
15
import org.apache.log4j.BasicConfigurator;
16

  
17
class TSImport extends XTZImport {
18
	
19
	public TSImport(BaseParameters params) {
20
		super(params);
21
	}
22
	
23
	@Override
24
	public void init(BaseParameters p) {
25
		super.init(p);
26
		
27
		importer = new TSImporter(this); // only to build metadata
28
		compiler = new XTZCompiler(this)
29
		annotater = null; // no annotater step to do
30
		pager = new XTZPager(this)
31
	}
32
	
33
	/**
34
	 * Do a XTZ Import then build the TIGERSearch indexes in the binary corpus "tiger" directory
35
	 */
36
	@Override
37
	public void start() throws InterruptedException {		
38
		File tigerSrcDir = sourceDirectory
39
		
40
		def xmlFiles = []
41
		sourceDirectory.listFiles(new FileFilter() {
42
			boolean accept(File file) {
43
				if (file.isDirectory()) return false;
44
				if (file.isHidden()) return false;
45
				if (file.getName().equals("import.xml")) return false;
46
				if (!file.getName().endsWith(".xml")) return false;
47
				
48
				xmlFiles << file
49
			}
50
		});
51
		
52
		xmlFiles.remove(new File(sourceDirectory, "import.xml"))
53
		
54
		if (xmlFiles.size() == 0) {
55
			println "Error no XML file found in $sourceDirectory"
56
			isSuccessful = false;
57
			return;
58
		}
59
		
60
		File master = xmlFiles[0];
61
		println "Main TIGER XML file found: $master"
62
		
63
		File tsXSLFile = new File(Toolbox.getTXMHOMEPATH(), "xsl/ts.xsl");
64
		BundleUtils.copyFiles("TIGERSearchRCP", "src", "org/txm/importer/tigersearch", "ts.xsl", tsXSLFile.getParentFile());
65
		
66
		File xmltxmSrcDir = new File(binaryDirectory, "src"); // output directory of the TS XSL transformation
67
		xmltxmSrcDir.mkdirs();
68
		FileCopy.copy(master, new File(xmltxmSrcDir, master.getName()));
69
			
70
		if (!ApplyXsl2.processImportSources(tsXSLFile, xmltxmSrcDir, xmltxmSrcDir)) {
71
			println "Error while applying TS XSL file to $tigerSrcDir"
72
			isSuccessful = false;
73
			return;
74
		}
75
		
76
		File[] files = xmltxmSrcDir.listFiles();
77
		if (files == null || files.length == 0) {
78
			println "Error while applying TS XSL file to $xmltxmSrcDir is empty"
79
			isSuccessful = false;
80
			return;
81
		}
82
		
83
		sourceDirectory = xmltxmSrcDir; // hop
84
		File txmDir = new File(binaryDirectory, "txm/"+corpusName); 
85
		txmDir.mkdirs();
86
		FileCopy.copyFiles(sourceDirectory, txmDir) // the compiler step will use these files
87
		
88
		super.start(); // call the usual XTZ import 
89
		
90
		if (isSuccessful) {
91
			
92
			File tigerDir = new File(binaryDirectory, "tiger");
93
			tigerDir.mkdir();
94
			
95
				
96
				File logprop = new File(tigerDir, "tigersearch.logprop");
97
				
98
				logprop.withWriter("UTF-8") { writer ->
99
					writer.write("""# Default log configuration of the TIGERSearch suite
100
log4j.rootLogger=WARN,Logfile
101
log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=WARNING
102
log4j.appender.Logfile=org.apache.log4j.RollingFileAppender
103
log4j.appender.Logfile.File=\${user.home}/tigersearch/tigersearch.log
104
log4j.appender.Logfile.MaxFileSize=500KB
105
log4j.appender.Logfile.MaxBackupIndex=1
106
log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout
107
log4j.appender.Logfile.layout.ConversionPattern=%5r %-5p [%t] %c{2} - %m%n""")	
108
				}
109
				
110
				BasicConfigurator.configure();
111
				String uri = master.getAbsolutePath();
112
				File tigerBinDir = new File(tigerDir, corpusName)
113
				tigerBinDir.mkdir()
114
				try {
115
					IndexBuilderErrorHandler handler = new SimpleErrorHandler(tigerBinDir.getAbsolutePath());
116
					XMLIndexing indexing = new XMLIndexing(corpusName, uri, tigerBinDir.getAbsolutePath(), handler,false);
117
					indexing.startIndexing();
118
				}
119
				catch (Exception e) { System.out.println(e.getMessage()); }
120
			}
121
	}
122
}
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXML.xsd (revision 911)
1
<?xml version="1.0" encoding="UTF-8"?>
2
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
3

  
4
 <!-- ==================================================================
5
      XML Schema for the TIGER-XML format
6
      http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXML.xsd
7
      ==================================================================
8
      TIGER Project, Wolfgang Lezius
9
      IMS, University of Stuttgart, 04/01/2003
10
      ================================================================== -->
11

  
12

  
13
  <!-- ======================================================
14
       INCLUDES DECLARATION OF THE HEADER
15
       ====================================================== -->
16
  <xs:include schemaLocation="TigerXMLHeader.xsd"/>
17

  
18

  
19
  <!-- ======================================================
20
       INCLUDES DECLARATION OF SUBCORPORA AND SENTENCES
21
       ====================================================== -->
22
  <xs:include schemaLocation="TigerXMLSubcorpus.xsd"/>
23

  
24

  
25
  <!-- ======================================================
26
       DECLARATION OF THE CORPUS DOCUMENT
27
       ====================================================== -->
28

  
29
  <!-- declaration of the root element: corpus -->
30

  
31
  <xs:element name="corpus">
32
  
33
    
34
  
35
        <xs:complexType>
36

  
37
      <xs:sequence>
38

  
39
        <xs:choice>           
40
           <!-- header of the document is optional -->
41
           <xs:element name="head" type="headType" minOccurs="0" maxOccurs="1" />
42
        <xs:/choice>
43

  
44
        <xs:element name="body" type="bodyType" minOccurs="1" maxOccurs="1" />
45

  
46
      <xs:/sequence>
47

  
48
      <!-- corpus ID -->
49
      <xs:attribute name="id" type="idType" use="required" />
50

  
51
      <!-- optional attribute: TigerXML version; used by TIGERSearch only -->
52
      <xs:attribute name="version" type="xsd:string" use="optional" />
53

  
54
    <xs:/complexType>
55
    <xs:/element>
56

  
57

  
58
  <!-- declaration of the body type -->
59

  
60
  <xs:complexType name="bodyType">
61

  
62
    <xs:choice minOccurs="1" maxOccurs="unbounded">
63
      <xs:element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/>
64
      <xs:element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/>
65
    <xs:/choice>
66

  
67
  <xs:/complexType>
68

  
69

  
70
<xs:/schema>
0 71

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/tigersearchLoader.groovy (revision 911)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
// $LastChangedDate: 2015-06-03 15:04:53 +0200 (mer., 03 juin 2015) $
23
// $LastChangedRevision: 2984 $
24
// $LastChangedBy: mdecorde $
25
//
26
package org.txm.importer.tigersearch;
27

  
28
import javax.xml.stream.XMLStreamReader;
29

  
30
import org.txm.sw.RemoveTag;
31
import org.txm.importer.ApplyXsl2;
32
import org.txm.importer.ValidateXml;
33
import org.txm.objects.*;
34
import org.txm.tokenizer.TokenizerClasses;
35
import org.txm.utils.*;
36
import org.txm.*;
37
import org.txm.scripts.teitxm.*;
38
import org.txm.utils.i18n.*;
39
import org.txm.metadatas.*;
40
import javax.xml.stream.*;
41
import org.w3c.dom.Element
42
import org.txm.utils.xml.DomUtils;
43
import org.txm.importer.xtz.*
44

  
45
String userDir = System.getProperty("user.home");
46

  
47
def MONITOR;
48
boolean debug = org.txm.utils.logger.Log.isPrintingErrors();
49
BaseParameters params;
50
try {params = paramsBinding;MONITOR=monitor} catch (Exception)
51
{	println "DEV MODE";//exception means we debug
52
	debug = true
53
	params = new BaseParameters(new File(userDir, "xml/roland/import.xml"))
54
	params.load()
55
	if (!org.txm.Toolbox.isInitialized()) {
56

  
57
		TokenizerClasses.loadFromNode(params.getTokenizerElement(params.getCorpusElement()));
58
		Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM"));
59
		//Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows
60
		Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger"));
61
		//Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File("C:\\Program Files\\treetagger"));//for Windows
62
		Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models"));
63
		Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
64
		Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
65
		Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
66
		//Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows
67
		Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM"));
68
	}
69
}
70

  
71
if (params == null) { println "no parameters. Aborting"; return; }
72

  
73
//params.getKeyValueParameters().put(ImportKeys.CLEAN, "false")
74
//params.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false")
75
//params.getKeyValueParameters().put(ImportKeys.DEBUG, "false")
76
//params.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "false")
77

  
78
TSImport i = new TSImport(params);
79
i.process();
80
readyToLoad = i.isSuccessful
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/ts.xsl (revision 911)
1
<?xml version="1.0"?>
2
<xsl:stylesheet version="1.0"
3
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
4
                xmlns:tei="http://www.tei-c.org/ns/1.0"
5
                xmlns:xd="http://www.pnp-software.com/XSLTdoc"
6
                xmlns:edate="http://exslt.org/dates-and-times"
7
                exclude-result-prefixes="edate xd">
8

  
9
  <xd:doc type="stylesheet">
10

  
11
    <xd:short>
12
      Feuille de transformation du format TIGER-XML vers le format XML-TXM
13
    </xd:short>
14

  
15
    <xd:detail>
16
      This stylesheet is free software; you can redistribute it and/or
17
      modify it under the terms of the GNU Lesser General Public
18
      License as published by the Free Software Foundation; either
19
      version 3 of the License, or (at your option) any later version.
20
      
21
      This stylesheet is distributed in the hope that it will be useful,
22
      but WITHOUT ANY WARRANTY; without even the implied warranty of
23
      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24
      Lesser General Public License for more details.
25
      
26
      You should have received a copy of GNU Lesser Public License with
27
      this stylesheet. If not, see http://www.gnu.org/licenses/lgpl.html
28
    </xd:detail>
29

  
30
    <xd:author>Matthieu Decorde, matthieu.decorde AT ens-lyon.fr</xd:author>
31
    <xd:author>Serge Heiden, slh AT ens-lyon.fr</xd:author>
32
    <xd:author>Alexey Lavrentev, alexei.lavrentev AT ens-lyon.fr></xd:author>
33

  
34
    <xd:copyright>2016, ENS de Lyon/CNRS (UMR IHRIM Cactus)</xd:copyright>
35

  
36
  </xd:doc>
37
  
38
  <xsl:output 
39
   method="xml"
40
   encoding="UTF-8"
41
   indent="yes" />
42

  
43
  <xsl:template match="corpus">
44
    <corpus>
45
    	<xsl:choose>
46
    		<xsl:when test="subcorpus">
47
    			<xsl:apply-templates select="subcorpus"/>
48
    		</xsl:when>
49
    		<xsl:otherwise>
50
    			<text>
51
    				<xsl:apply-templates select="s"/>
52
    			</text>			
53
    		</xsl:otherwise>
54
    	</xsl:choose>
55
    	<xsl:apply-templates/>
56
    </corpus>
57
  </xsl:template>
58
  
59
  <xsl:template match="subcorpus">
60
    <text>
61
	<xsl:attribute name="name"><xsl:value-of select="@name"/></xsl:attribute>
62
	<xsl:apply-templates select="s"/>
63
    </text>
64
  </xsl:template>
65
  
66
  <xsl:template match="s">
67
	<p>
68
	  <s>
69
	    <xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
70
	      <xsl:apply-templates select="graph/terminals/t" />
71
	  </s>
72
	</p>
73
  </xsl:template>
74

  
75
  <xsl:template match="t">
76
		<w>
77
			<xsl:for-each select="@*[not(name()='word')]">
78
				<xsl:copy/>
79
			</xsl:for-each>
80
	        <xsl:value-of select="@word"/>
81
		</w>
82
  </xsl:template>
83

  
84
</xsl:stylesheet>
0 85

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TSImporter.groovy (revision 911)
1
package org.txm.importer.tigersearch
2

  
3
import org.txm.Toolbox
4
import org.txm.importer.xtz.ImportModule;
5
import org.txm.importer.xtz.XTZImporter
6
import org.txm.metadatas.Metadatas
7
import org.txm.utils.io.FileCopy
8

  
9
/**
10
 * Only build the Metadatas object since all XML-TXM files already exists.
11
 * Metadatas is used to build text order.
12
 * 
13
 * 
14
 * @author mdecorde
15
 *
16
 */
17
class TSImporter extends XTZImporter {
18

  
19
	public TSImporter(ImportModule module) {
20
		super(module);
21
	}
22
	
23
	@Override
24
	public void process() {
25
		File binDir = module.getBinaryDirectory();
26
		
27
		//prepare metadata if any
28
		File allmetadatafile = new File(inputDirectory, "metadata.csv");
29
		println allmetadatafile
30
		if (allmetadatafile.exists()) {
31
			File copy = new File(binDir, "metadata.csv")
32
			if (!FileCopy.copy(allmetadatafile, copy)) {
33
				println "Error: could not create a copy of the metadata file "+allmetadatafile.getAbsoluteFile();
34
				return;
35
			}
36
			this.metadata = new Metadatas(copy,
37
					Toolbox.getPreference(Toolbox.METADATA_ENCODING),
38
					Toolbox.getPreference(Toolbox.METADATA_COLSEPARATOR),
39
					Toolbox.getPreference(Toolbox.METADATA_TXTSEPARATOR), 1)
40
		}
41
		isSuccessFul = true;
42
	}
43
}
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXMLSubcorpus.xsd (revision 911)
1
<?xml version="1.0" encoding="UTF-8"?>
2
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
3

  
4
 <!-- ===========================================================================
5
      XML Schema for the subcorpus part of the TIGER-XML format
6
      http://www.ims.uni-stuttgart.de/projekte/TIGER/public/TigerXMLSubcorpus.xsd
7
      ===========================================================================
8
      TIGER Project, Wolfgang Lezius
9
      IMS, University of Stuttgart, 04/01/2003
10
      =========================================================================== -->
11

  
12
  <!-- ======================================================
13
       DECLARATION OF SUBCORPORA AND SENTENCES
14
       ====================================================== -->
15

  
16

  
17
  <!-- declaration of the subcorpus element -->
18

  
19
  <xs:element name="subcorpus" type="subcorpusType"/>
20

  
21

  
22
  <!-- declaration of the subcorpus type -->
23

  
24
  <xs:complexType name="subcorpusType">
25

  
26
    <!-- A subcorpus may comprise another subcorpora or sentences -->
27

  
28
    <xs:choice minOccurs="0" maxOccurs="unbounded">
29
      <xs:element name="subcorpus" type="subcorpusType" minOccurs="1" maxOccurs="1"/>
30
      <xs:element name="s" type="sentenceType" minOccurs="1" maxOccurs="1"/>
31
    <xs:/choice>
32

  
33
    <!-- required: subcorpus name -->
34
 
35
    <xs:attribute name="name" type="xsd:string" use="required"/>
36

  
37
    <!-- optional: reference to external subcorpus file 
38

  
39
         A subcorpus of a TigerXML corpus can also be stored in separate file. 
40
         This attribute points to the external subcorpus file. The pointer is
41
         an URI. Examples: file:relative.xml or file:/path/to/absolute.xml 
42

  
43
         Note: If there is a pointer to an external file, the subcorpus
44
               element must be empty. -->
45

  
46
    <xs:attribute name="external" type="xsd:anyURI"/>  
47

  
48
  <xs:/complexType>
49

  
50

  
51
  <!-- declaration of the sentence type -->
52

  
53
  <xs:complexType name="sentenceType">
54

  
55
    <xs:sequence>
56
      <xs:element name="graph" type="graphType" minOccurs="0" maxOccurs="1"/>
57
      <xs:element name="matches" type="matchesType" minOccurs="0" maxOccurs="1"/>
58
    <xs:/sequence>
59

  
60
    <xs:attribute name="id" type="idType" use="required"/>
61

  
62
  <xs:/complexType>
63

  
64

  
65
  <!-- declaration of the graph type -->
66

  
67
  <xs:complexType name="graphType">
68

  
69
    <xs:sequence>
70
      <xs:element name="terminals" type="terminalsType" minOccurs="1" maxOccurs="1"/>
71
      <xs:element name="nonterminals" type="nonterminalsType" minOccurs="1" maxOccurs="1"/>
72
    <xs:/sequence>
73

  
74
    <xs:attribute name="root" type="idrefType" use="required"/>
75

  
76
    <!-- indicated that the exported sentence is discontinuous -->
77
    <xs:attribute name="discontinuous" type="xsd:boolean" default="false" use="optional"/>
78

  
79
  <xs:/complexType>
80

  
81

  
82
  <!-- declaration of the terminals type -->
83

  
84
  <xs:complexType name="terminalsType">
85

  
86
    <xs:sequence>
87
      <xs:element name="t" type="tType" minOccurs="1" maxOccurs="unbounded"/>
88
    <xs:/sequence>
89

  
90
  <xs:/complexType>
91

  
92

  
93
  <!-- declaration of the t element -->
94

  
95
  <xs:complexType name="tType">
96

  
97
    <!-- secondary edges possible -->
98
    <xs:sequence>
99
      <xs:element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/>
100
    <xs:/sequence>
101

  
102
    <xs:attribute name="id" type="idType" use="required"/>    
103
    <xs:attributeGroup ref="tfeatureAttributes"/>
104

  
105
  <xs:/complexType>
106

  
107

  
108
  <!-- declaration of the nonterminals type -->
109

  
110
  <xs:complexType name="nonterminalsType">
111

  
112
    <xs:sequence>
113
      <xs:element name="nt" type="ntType" minOccurs="0" maxOccurs="unbounded"/>
114
    <xs:/sequence>
115

  
116
  <xs:/complexType>
117

  
118

  
119
  <!-- declaration of the nt element -->
120

  
121
  <xs:complexType name="ntType">
122

  
123
    <!-- edge and secondary edges possible -->
124
    <xs:sequence>
125
      <xs:element name="edge" type="edgeType" minOccurs="0" maxOccurs="unbounded"/>
126
      <xs:element name="secedge" type="secedgeType" minOccurs="0" maxOccurs="unbounded"/>
127
    <xs:/sequence>
128

  
129
    <xs:attribute name="id" type="idType" use="required"/>    
130
    <xs:attributeGroup ref="ntfeatureAttributes"/>
131

  
132
  <xs:/complexType>
133

  
134

  
135
  <!-- declaration of the edge type -->
136

  
137
  <xs:complexType name="edgeType">
138

  
139
    <xs:attribute name="idref" type="idrefType" use="required"/>    
140

  
141
    <xs:attributeGroup ref="edgelabelAttribute"/>
142

  
143
  <xs:/complexType>
144

  
145

  
146
  <!-- declaration of the secondary edge type -->
147

  
148
  <xs:complexType name="secedgeType">
149

  
150
    <xs:attribute name="idref" type="idrefType" use="required"/>    
151

  
152
    <xs:attributeGroup ref="secedgelabelAttribute"/>
153

  
154
  <xs:/complexType>
155

  
156

  
157
  <!-- declaration of the matches type -->
158

  
159
  <xs:complexType name="matchesType">
160

  
161
    <xs:sequence>
162
      <xs:element name="match" type="matchType" minOccurs="1" maxOccurs="unbounded"/>
163
    <xs:/sequence>
164

  
165
  <xs:/complexType>
166

  
167

  
168
  <!-- declaration of the match type -->
169

  
170
  <xs:complexType name="matchType">
171

  
172
    <xs:sequence>
173
      <xs:element name="variable" type="varType" minOccurs="1" maxOccurs="unbounded"/>
174
    <xs:/sequence>
175

  
176
    <xs:attribute name="subgraph" type="idrefType" use="required"/>    
177

  
178
  <xs:/complexType>
179

  
180

  
181
  <!-- declaration of the variable type -->
182

  
183
  <xs:complexType name="varType">
184

  
185
    <xs:attribute name="name" type="xsd:string" use="required"/>    
186

  
187
    <xs:attribute name="idref" type="idrefType" use="required"/>    
188

  
189
  <xs:/complexType>
190

  
191

  
192
  <!-- ======================================================
193
       SENTENCE DECLARATIONS THAT SHOULD BE REFINED
194
       ====================================================== -->
195

  
196
  <!-- declaration of the TERMINAL FEATURE ATTRIBUTES;
197
       this group is unrestricted, but should be refined by a 
198
       specialised, corpus-dependent schema -->
199

  
200
  <xs:attributeGroup name="tfeatureAttributes">
201
  
202
    <xs:anyAttribute processContents="skip"/>
203

  
204
  <xs:/attributeGroup>
205

  
206

  
207
  <!-- declaration of the NONTERMINAL FEATURE ATTRIBUTES;
208
       this group is unrestricted, but should be refined by a 
209
       specialised, corpus-dependent schema -->
210

  
211
  <xs:attributeGroup name="ntfeatureAttributes">
212
  
213
    <xs:anyAttribute processContents="skip"/>
214

  
215
  <xs:/attributeGroup>
216

  
217

  
218
  <!-- declaration of the EDGE-LABEL ATTRIBUTE;
219
       the label attribute is optional which should be refined by a 
220
       specialised, corpus-dependent schema -->
221

  
222
  <xs:attributeGroup name="edgelabelAttribute">
223
  
224
    <xs:attribute name="label" type="xsd:string" use="optional"/>    
225

  
226
  <xs:/attributeGroup>
227
    
228

  
229
  <!-- declaration of the SECONDARY-EDGE-LABEL ATTRIBUTE;
230
       the label attribute is optional which should be refined by a 
231
       specialised, corpus-dependent schema -->
232

  
233
  <xs:attributeGroup name="secedgelabelAttribute">
234
  
235
    <xs:attribute name="label" type="xsd:string" use="optional"/>    
236

  
237
  <xs:/attributeGroup>
238
 
239

  
240
  <!-- ======================================================
241
       ID and IDREF TYPE DECLARATIONS
242
       ====================================================== -->
243

  
244
  <!-- Even though XML Schema are a W3C Recommendation, schema
245
       support of XML parsers is still restricted. Using some
246
       parsers you might have problems with the ID and IDREF
247
       attributes in combination with an "anyAttribute"
248
       declaration. In this case, just modify the base type 
249
       of the following two declarations to "xsd:string".  -->
250

  
251

  
252
  <!-- declaration of idType -->
253

  
254
  <xs:simpleType name="idType">
255

  
256
    <xs:restriction base="xsd:ID"/>
257

  
258
  <xs:/simpleType>
259

  
260

  
261
  <!-- declaration of idrefType -->
262

  
263
  <xs:simpleType name="idrefType">
264

  
265
    <xs:restriction base="xsd:IDREF"/>
266

  
267
  <xs:/simpleType>
268

  
269

  
270
<xs:/schema>
0 271

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/package.html (revision 911)
1
<html>
2
<body>
3
<p>TIGERSearch import module. This is a prototype that can only manage SRCMF TIGERSearch sources</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/tigersearch/TigerXMLHeader.xsd (revision 911)
1
<?xml version="1.0" encoding="UTF-8"?>
2
  <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
3

  
4
 <!-- =======================================================================
5
      XML SubSchema for the header part of the TIGER-XML format
6
      http://www.ims.uni-stuttgart.de/projekte/TIGER/publicTigerXMLHeader.xsd
7
      =======================================================================
8
      TIGER Project, Wolfgang Lezius 
9
      IMS, University of Stuttgart, 04/01/2003
10
      ======================================================================= -->
11

  
12

  
13
  <!-- ======================================================
14
       DECLARATION OF THE HEADER
15
       ====================================================== -->
16

  
17

  
18
  <!-- declaration of the head element -->
19

  
20
  <xs:element name="head" type="headType"/>
21

  
22

  
23
  <!-- declaration of the header type -->
24

  
25
  <xs:complexType name="headType">
26

  
27
     <xs:sequence>
28
        <xs:element name="meta" type="metaType" minOccurs="0" maxOccurs="1"/>
29
        <xs:element name="annotation" type="annotationType" minOccurs="0" maxOccurs="1"/>
30
     <xs:/sequence>    
31

  
32
     <!-- optional: reference to external header file 
33

  
34
          The header of a TigerXML corpus can also be stored in separate file. 
35
          This attribute points to the external header file. The pointer is
36
          an URI. Examples: file:relative.xml or file:/path/to/absolute.xml
37

  
38
          Note: If there is a pointer to an external file, the head
39
                element must be empty. -->
40

  
41
     <xs:attribute name="external" type="xsd:anyURI"/>  
42

  
43
  <xs:/complexType>
44

  
45

  
46
  <!-- declaration of the meta information type -->
47

  
48
  <xs:complexType name="metaType">
49

  
50
    <xs:sequence>
51
      <xs:element name="name" type="xsd:string" minOccurs="0" maxOccurs="1"/>
52
      <xs:element name="author" type="xsd:string" minOccurs="0" maxOccurs="1"/>
53
      <xs:element name="date" type="xsd:string" minOccurs="0" maxOccurs="1"/>
54
      <xs:element name="description" type="xsd:string" minOccurs="0" maxOccurs="1"/>
55
      <xs:element name="format" type="xsd:string" minOccurs="0" maxOccurs="1"/>
56
      <xs:element name="history" type="xsd:string" minOccurs="0" maxOccurs="1"/>
57
    <xs:/sequence>    
58

  
59
  <xs:/complexType>
60
  
61

  
62
  <!-- declaration of the annotation type -->
63

  
64
  <xs:complexType name="annotationType">
65

  
66
    <xs:sequence>
67
      <xs:element name="feature" type="featureType" minOccurs="1" maxOccurs="unbounded"/>
68
      <xs:element name="edgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/>
69
      <xs:element name="secedgelabel" type="edgelabelType" minOccurs="0" maxOccurs="1"/>
70
    <xs:/sequence>
71

  
72
  <xs:/complexType>
73

  
74

  
75
  <!-- declaration of the feature type -->
76

  
77
  <xs:complexType name="featureType">
78

  
79
    <xs:sequence>
80
       <xs:element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/>
81
    <xs:/sequence>
82
    
83
    <xs:attribute name="name" type="featurenameType" use="required"/>
84

  
85
    <xs:attribute name="domain" use="required">
86
       <xs:simpleType>
87
         <xs:restriction base="xsd:string">
88
           <xs:enumeration value="T"/>     <!-- feature for terminal nodes -->
89
           <xs:enumeration value="NT"/>    <!-- feature for nonterminal nodes -->
90
           <xs:enumeration value="FREC"/>  <!-- feature for both -->
91
         <xs:/restriction>
92
       <xs:/simpleType>
93
    <xs:/attribute>
94

  
95
  <xs:/complexType>
96

  
97

  
98
  <!-- declaration of the (secondary) edge label type -->
99

  
100
  <xs:complexType name="edgelabelType">
101

  
102
    <xs:sequence>
103
       <xs:element name="value" type="featurevalueType" minOccurs="0" maxOccurs="unbounded"/>
104
    <xs:/sequence>
105
    
106
  <xs:/complexType>
107

  
108

  
109
  <!-- declaration of the feature value type -->
110

  
111
  <xs:complexType name="featurevalueType">
112

  
113
    <xs:simpleContent>   <!-- element content: documentation of the feature value -->
114
      <xs:extension base="xsd:string">
115
        <xs:attribute name="name" type="xsd:string"/>
116
      <xs:/extension>
117
    <xs:/simpleContent>
118

  
119

  
120
  <xs:/complexType>
121

  
122

  
123
  <!-- ======================================================
124
       HEADER DECLARATIONS THAT SHOULD BE REFINED
125
       ====================================================== -->
126

  
127
  <!-- declaration of the FEATURE NAMES used in the corpus header;
128
       this type is unrestricted, but should be refined by a 
129
       specialised, corpus-dependent schema -->
130

  
131
  <xs:simpleType name="featurenameType">
132

  
133
    <xs:restriction base="xsd:string">
134
      <xs:minLength value="1"/>
135
      <xs:maxLength value="20"/>
136
      <xs:whiteSpace value="preserve"/>
137
    <xs:/restriction>
138

  
139
  <xs:/simpleType>
140

  
141

  
142
<xs:/schema>
0 143

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/package.html (revision 911)
1
<html>
2
<body>
3
<p>TIGERSearch import module. This is a prototype that can only manage SRCMF TIGERSearch sources</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/MasterReheader.groovy (revision 911)
1
#! /usr/bin/groovy
2
package org.txm.importer.srcmf;
3

  
4
/*
5
 * To change this template, choose Tools | Templates
6
 * and open the template in the editor.
7
 */
8

  
9
// Set up globals
10
// def masterFile = new File('/home/tomr/Documents/Work/lyon12/srcmf/groovy/SrcmfImport/aucassin_surface/master_pos.xml')
11
// def outputFile = new File('/home/tomr/Documents/Work/lyon12/srcmf/groovy/SrcmfImport/aucassin_surface/master_pos2.xml')
12
// def headerFile = new File('/home/tomr/Documents/Work/SRCMF/srcmf_ts/header_srcmf.xml')
13
// def feats = [nt:['cat', 'type', 'coord'], t:['pos', 'form', 'q']]
14
// def firstFeat = [t:'word', nt:'cat']
15

  
16
// Command-line entry point
17
def cli = new CliBuilder(
18
    usage:'MasterReheader.groovy [options] master_file.xml header_file.xml'
19
)
20
cli.h(longOpt:'help', 'Prints this message.')
21
cli.o(longOpt:'output', args:1, argName:'outputfile.xml', 'Output to given file.')
22
cli.nt(longOpt:'ntfeats', args:1, argName:'ntfeats', 'NT features for which to provide value node.')
23
cli.t(longOpt:'tfeats', args:1, argName:'tfeats', 'T features for which  to provide value node.')
24
cli.nt1(longOpt:'ntfeat1st', args:1, argName:'1st ntfeat', 'First NT feature listed in header.')
25
cli.t1(longOpt:'tfeat1st', args:1, argName:'1st tfeat', 'First T feature listed in header.')
26
options = cli.parse(args)
27
if (options.arguments().size() == 2) {
28
    def masterFile = new File(options.arguments()[0])
29
    def headerFile = new File(options.arguments()[1])
30
    def masterFolder = masterFile.getCanonicalFile().getParent()
31
    def outputFile = null
32
    if (options.o) {
33
        outputFile = new File(options.o)
34
    } else {
35
        outputFile = new File(masterFolder, 'MasterReheader_out.xml')
36
    }
37
    def ntfirst = 'cat'
38
    if (options.nt1) {
39
        ntfirst = options.nt1
40
    }
41
    def tfirst = 'word'
42
    if (options.t1) {
43
        tfirst = options.t1
44
    }
45
    script(
46
        masterFile, headerFile, outputFile, 
47
        ['nt':options.nts, 't':options.ts],
48
        ['nt':options.nt1, 't':options.t1]
49
    )    
50
} else {
51
    println 'Incorrect number of command line arguments... exiting'
52
    println cli.usage()
53
}
54

  
55
def script(
56
    File masterFile, File headerFile, File outputFile, HashMap feats, HashMap firstFeat
57
) {
58
    // Load master and header files
59
    def master = new XmlParser().parse(masterFile)
60
    def header = new XmlParser().parse(headerFile)
61
    def masterFolder = masterFile.getCanonicalFile().getParent()
62

  
63
    // Set up locals
64
    def attrVal = [nt:[:], t:[:]]
65

  
66
    // Scan subcorpus files and build attribute lists.
67
    master.body.subcorpus.each { 
68
        def subcorpusFile = new File (masterFolder, it.'@external'[5..-1])
69
        def subcorpus = new XmlParser().parse(subcorpusFile)
70
        // Closure for t & nt nodes processing.
71
        def getvals = { node, type ->
72
            node.attributes().each { mEntry ->
73
                if (! attrVal[type].keySet().contains(mEntry.getKey())) {
74
                    attrVal[type][mEntry.getKey()] = new HashSet()
75
                }
76
                attrVal[type][mEntry.getKey()].add(mEntry.getValue())
77
            }
78
        }
79
        subcorpus.s.graph.terminals.t.each { getvals.call(it, 't') }
80
        subcorpus.s.graph.nonterminals.nt.each { getvals.call(it, 'nt') }
81
    }
82
    // Id isn't an attribute in the header.
83
    attrVal['t'].remove('id')
84
    attrVal['nt'].remove('id')
85
    // Remove old feature nodes in master file
86
    def oldFeatureNodes = master.head.annotation.feature
87
    while (oldFeatureNodes) {
88
        node = oldFeatureNodes.pop()
89
        node.parent().remove(node)
90
    }
91
    assert (! master.head.annotation.feature)
92
    // Check firstFeat was relevant
93
    ['t', 'nt'].each { type ->
94
        if (! (attrVal[type].keySet().contains(firstFeat[type]))) {
95
            firstFeat[type] = attrVal[type].keySet().sort()[0]
96
        }
97
    }
98
    assert attrVal['t'].keySet().contains(firstFeat['t'])
99
    assert attrVal['nt'].keySet().contains(firstFeat['nt'])
100
    def featList = [:]
101
    ['t', 'nt'].each { type ->
102
        featList[type] = [firstFeat[type]]
103
        featList[type].addAll(attrVal[type].keySet().findAll { it != firstFeat[type] })
104
    }
105
    // Add new feature and value nodes
106
    ['t', 'nt'].each { type ->
107
        featList[type].each { feat ->
108
            def fNode = new Node(master.head.annotation[0], 'feature', 
109
                ['domain':type.toUpperCase(), 'name':feat]
110
            )
111
            // Add value node if the node value is given in 'feats'
112
            if (feats[type].contains(feat)) {
113
                attrVal[type][feat].each { value ->
114
                    assert header.'**'.feature
115
                    assert header.'**'.feature[0].'@name'
116
                    assert header.'**'.feature[0].'@domain'
117
                    assert ['NT', 'T'].contains(header.'**'.feature[0].'@domain')
118
                    def hFNode = header.'**'.feature.find {
119
                        it.'@name' == feat && (
120
                            it.'@domain' == type.toUpperCase() || it.'@domain' == 'FREC'
121
                        )
122
                    }
123
                    def vText = '[unknown]'
124
                    if (hFNode && hFNode.value.find { it.'@name' == value }) {
125
                        vText = hFNode.value.find { it.'@name' ==  value }.text()
126
                    }
127
                    new Node(fNode, 'value', ['name':value], vText)
128
                }
129
            }
130
        }    
131
    }
132

  
133
    // Save to output_file
134
    outputFile.withWriter { writer ->
135
        writer << groovy.xml.XmlUtil.serialize(master)
136
    }
137
}
138

  
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/SrcmfImporter.groovy (revision 911)
1
/*
2
 * Calls all import scripts
3
 */
4

  
5
/**
6
 *
7
 * @author tmr
8
 */
9

  
10
// Command line form of import statements:
11
// import MasterReheader
12
// import PunctInjectImport
13
// import SubcorpusDataInject
14
// import TigerSubcorpus2Main
15
// import java.util.logging.FileHandler
16
// import javax.xml.parsers.DocumentBuilderFactory
17

  
18
// TXM package statement
19
package org.txm.importer.srcmf
20

  
21
import java.util.logging.*;
22

  
23

  
24
// Command line entry point
25
def cli = new CliBuilder(
26
    usage:'SrcmfImport.groovy [options] tiger_master.xml xml_txm.xml header_file.xml'
27
)
28
cli.h(longOpt:'help', 'Prints this message.')
29
options = cli.parse(args)
30
if (options.arguments().size() != 3) {
31
    println 'Incorrect number of command line arguments... exiting'
32
    println cli.usage()
33
    System.exit(2)
34
}
35

  
36
def tigerFile = new File(options.arguments()[0])
37
def txmFile = new File(options.arguments()[1])
38
def headerFile = new File(options.arguments()[2])
39
def tigerXmlAll = doAllButPnc(
40
    tigerFile, 
41
    txmFile, 
42
    headerFile,
43
    txmFile.getAbsoluteFile().getParentFile().getParentFile()
44
)
45
doPnc(tigerXmlAll, txmFile)
46
tigerXmlAll.delete()
47

  
48
def doAllButPnc(File tigerFile, File txmFile, File headerFile, File binDir) {
49
    // Run pos injection script
50
    File txmSrcDir = txmFile.getAbsoluteFile().getParentFile()
51
    File tigerDir = new File(binDir, "tiger")
52
    tigerDir.mkdir()
53
    File masterpos = new File(tigerDir, "master_pos.xml")
54
    File xmltxm = txmSrcDir.listFiles()[0]
55
    File logFile = new File(binDir, "tiger.log")
56
    def sdi = new SubcorpusDataInject(
57
        xmltxm, 
58
        new FileHandler(logFile.getAbsolutePath()), "vers"
59
    )
60
    sdi.processMaster(tigerFile, masterpos)
61
    // Run reheader script
62
    def reheader = new MasterReheader()
63
    File tmp = File.createTempFile("tmp", ".xml",tigerDir)
64
    def feats = ['nt':['cat', 'type', 'coord'], 't':['pos', 'form', 'q']]
65
    def firstFeat = ['nt':'cat', 't':'word']
66
    reheader.script(masterpos, headerFile, tmp, feats, firstFeat)
67
    if (!tmp.exists()) {
68
    	println "Error: reheader failed"
69
    }
70
    masterpos.delete()
71
    tmp.renameTo(masterpos)
72
    // Run merge master & subcorpus script
73
    def tigerXmlAll = new File(masterpos.getParentFile(), "TigerAll.xml")
74
    def mergescript = new TigerSubcorpus2Main()
75
    mergescript.script(masterpos, tigerXmlAll)
76
    return tigerXmlAll
77
}
78

  
79
def doPnc(File tigerXmlAll, File txmFile) {
80
    injector = new PunctInjectImport(tigerXmlAll, txmFile)
81
    injector.outputFile = new File(tigerXmlAll.getParentFile(), "TigerPnc.xml")
82
    injector.process()
83
}
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/SubcorpusDataInject.groovy (revision 911)
1
#! /usr/bin/groovy
2
package org.txm.importer.srcmf;
3
/*
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff