Revision 2112

tmp/org.txm.connlu.core/plugin.xml (revision 2112)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<?eclipse version="3.4"?>
3
<plugin>
4
   <extension
5
         point="org.eclipse.ui.menus">
6
      <menuContribution
7
            locationURI="menu:menu.file.import?before=menu.file.import.separator.software">
8
         <command
9
               commandId="org.txm.rcp.handlers.scripts.ExecuteImportScript"
10
               label="Connl-UD"
11
               style="push">
12
            <parameter
13
                  name="org.txm.rcp.commands.commandParameter3"
14
                  value="connlu/connluLoader.groovy">
15
            </parameter>
16
         </command>
17
      </menuContribution>
18
   </extension>
19
   <extension
20
         point="org.txm.PostTXMHOMEInstallationStep">
21
      <PostTXMHOMEInstallationStep
22
            class="org.txm.connlu.core.InstallGroovyCONNLUFiles"
23
            description="copy scripts for import module"
24
            name="CONNLUFiles">
25
      </PostTXMHOMEInstallationStep>
26
   </extension>
27
</plugin>
0 28

  
tmp/org.txm.connlu.core/.settings/org.eclipse.jdt.core.prefs (revision 2112)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
4
org.eclipse.jdt.core.compiler.compliance=1.7
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.7
0 8

  
tmp/org.txm.connlu.core/.settings/org.eclipse.jdt.groovy.core.prefs (revision 2112)
1
eclipse.preferences.version=1
2
groovy.compiler.level=25
0 3

  
tmp/org.txm.connlu.core/.classpath (revision 2112)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="src" path="groovy"/>
11
	<classpathentry exported="true" kind="con" path="GROOVY_SUPPORT"/>
12
	<classpathentry exported="true" kind="con" path="GROOVY_DSL_SUPPORT"/>
13
	<classpathentry kind="output" path="bin"/>
14
</classpath>
0 15

  
tmp/org.txm.connlu.core/META-INF/MANIFEST.MF (revision 2112)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: CONNLU
4
Bundle-SymbolicName: org.txm.connlu.core;singleton:=true
5
Bundle-Version: 1.0.0.qualifier
6
Automatic-Module-Name: org.txm.connlu.core
7
Bundle-RequiredExecutionEnvironment: JavaSE-1.7
8
Require-Bundle: org.txm.core;bundle-version="0.8.0";visibility:=reexport,
9
 org.txm.groovy.core;bundle-version="1.0.0";visibility:=reexport
0 10

  
tmp/org.txm.connlu.core/.project (revision 2112)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.connlu.core</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.jdt.groovy.core.groovyNature</nature>
26
		<nature>org.eclipse.pde.PluginNature</nature>
27
		<nature>org.eclipse.jdt.core.javanature</nature>
28
	</natures>
29
</projectDescription>
0 30

  
tmp/org.txm.connlu.core/groovy/org/txm/scripts/importer/connlu/CONNLUImporter.groovy (revision 2112)
1
package org.txm.scripts.importer.connlu
2

  
3
import org.txm.Toolbox
4
import org.txm.importer.xtz.ImportModule;
5
import org.txm.metadatas.Metadatas
6
import org.txm.utils.io.FileCopy
7
import org.txm.utils.io.IOUtils
8
import org.txm.importer.xtz.*
9
import org.txm.scripts.importer.xtz.*
10
import org.txm.importer.ApplyXsl2;
11
import javax.xml.stream.*
12
import org.txm.utils.AsciiUtils
13

  
14
/**
15
 * Only build the Metadatas object since all XML-TXM files already exists.
16
 * Metadatas is used to build text order.
17
 * 
18
 * 
19
 * @author mdecorde
20
 *
21
 */
22
class CONNLUImporter extends XTZImporter {
23

  
24
	public CONNLUImporter(ImportModule module) {
25
		super(module);
26
	}
27

  
28
	@Override
29
	public void process() {
30

  
31
		File srcDirectory = new File(outputDirectory.getParentFile(), "src")
32
		srcDirectory.mkdirs();
33
		
34
		convert(inputDirectory, srcDirectory)
35

  
36
		inputDirectory = srcDirectory // switch source directory
37
		super.process();
38
	}
39
	
40
	public static void convert(File inputDirectory, File srcDirectory) {
41
		File master = null;
42
		def files = inputDirectory.listFiles()
43
		for (def f : files) {
44
			if (f.getName().endsWith(".connlu")) {
45
				master = f;
46
				break;
47
			}
48
		}
49

  
50
		if (master == null) {
51
			println "Aborting. No CONNLU file found in $inputDirectory."
52
			isSuccessFul = false;
53
			return
54
		}
55

  
56
		
57
		
58
		String text_id = null;
59
		String sent_id = null;
60
		def content = [:];
61
		master.eachLine("UTF-8") { line ->
62
			if (line.startsWith("# text = ")) {
63
				text_id = line.substring("# text = ".length())
64
			} else if (line.startsWith("# sent_id = ")) {
65
				sent_id = line.substring("# sent_id = ".length())
66
			} else {
67
				if (text_id != null && sent_id != null) {
68
					if (!content.containsKey(text_id)) {
69
						content[text_id] = [:]
70
					}
71
					def text = content[text_id]
72
					if (!text.containsKey(sent_id)) {
73
						text[sent_id] = []
74
					}
75
					text[sent_id] << line.split("\t")
76
				}
77
			}
78
		}
79

  
80
		
81
		for (def text_id2 : content.keySet()) {
82
			File xmlFile = new File(srcDirectory, AsciiUtils.buildAttributeId(text_id)+".xml")
83
			BufferedOutputStream output = new BufferedOutputStream(new FileOutputStream(xmlFile))
84
			XMLOutputFactory factory = XMLOutputFactory.newInstance();
85
			XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");
86

  
87
			writer.writeStartDocument("UTF-8","1.0");
88
			writer.writeStartElement ("TEI");
89
			writer.writeDefaultNamespace("http://www.tei-c.org/ns/1.0");
90
			writer.writeNamespace("txm", "http://textometrie.org/1.0");
91
			writer.writeStartElement ("teiHeader");
92
			writer.writeEndElement()
93
			writer.writeStartElement ("text");
94
			def text = content[text_id2]
95
			for (def sent_id2 : text.keySet()) {
96
				writer.writeStartElement ("s");
97
				for (def line : text[sent_id]) {
98
					writer.writeStartElement ("w");
99
					for (int i = 0 ; i < line.size() ; i++) {
100
						writer.writeAttribute("p"+(i+1), line[i])
101
					}
102
					writer.writeCharacters(line[1])
103
					writer.writeEndElement() // w
104
					writer.writeCharacters(" ")
105
				}
106
				
107
				writer.writeEndElement() // s
108
				writer.writeCharacters("\n")
109
			}
110
			writer.writeEndElement() // text
111
			writer.writeEndElement() // TEI
112
		}
113
	}
114
}
tmp/org.txm.connlu.core/groovy/org/txm/scripts/importer/connlu/connluLoader.groovy (revision 2112)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2015-06-03 15:04:53 +0200 (mer., 03 juin 2015) $
25
// $LastChangedRevision: 2984 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.scripts.importer.connlu;
29

  
30
import javax.xml.stream.*
31

  
32
import org.txm.*
33
import org.txm.metadatas.*
34
import org.txm.objects.*
35
import org.txm.importer.scripts.xmltxm.*
36
import org.txm.utils.*
37
import org.txm.utils.i18n.*
38
import org.txm.importer.xtz.*
39

  
40
String userDir = System.getProperty("user.home");
41

  
42
def MONITOR;
43
Project project;
44

  
45
try {project=projectBinding;MONITOR=monitor} catch (Exception)
46
{	}
47
if (project == null) { println "no project set. Aborting"; return; }
48

  
49
CONNLUImport i = new CONNLUImport(project);
50
/*
51
 * To customize the XTZ import, replace the importer, compiler, annotater or pager objects before calling process()
52
		i.importer = new XTZImporter(i)
53
		i.compiler = new XTZCompiler(i)
54
		i.annotater = new TTAnnotater(i);
55
		i.pager = new XTZPager(i)
56
 */
57
i.process();
58
readyToLoad = i.successFul && project.save()
tmp/org.txm.connlu.core/groovy/org/txm/scripts/importer/connlu/CONNLUImport.groovy (revision 2112)
1
package org.txm.scripts.importer.connlu;
2

  
3
import org.apache.log4j.BasicConfigurator;
4
import org.txm.importer.xtz.*
5
import org.txm.objects.Project
6
import org.txm.scripts.importer.xtz.*
7

  
8
import ims.tiger.index.writer.*
9
import ims.tiger.system.*
10

  
11
class CONNLUImport extends XTZImport {
12

  
13
	public CONNLUImport(Project params) {
14
		super(params);
15
	}
16

  
17
	@Override
18
	public void init(Project p) {
19
		super.init(p);
20

  
21
		importer = new CONNLUImporter(this); // select TIGER XML files then do XTZImporter step
22
		compiler = new XTZCompiler(this)
23
		annotater = null; // no annotater step to do
24
		pager = new XTZPager(this)
25
	}
26

  
27
	/**
28
	 * Do a XTZ Import then build the TIGERSearch indexes in the binary corpus "tiger" directory
29
	 */
30
	@Override
31
	public void start() throws InterruptedException {
32
		
33
		super.start(); // call the usual XTZ import
34
		
35
		if (successFul) {
36
			println "BUILD CONNL-UD indexes ??"
37
	
38
		}
39
	}
40
}
tmp/org.txm.connlu.core/groovy/org/txm/scripts/importer/connlu/package.html (revision 2112)
1
<html>
2
<body>
3
<p>TIGERSearch import module. This is a prototype that can only manage SRCMF TIGERSearch sources</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.connlu.core/src/org/txm/connlu/core/InstallGroovyCONNLUFiles.java (revision 2112)
1
package org.txm.connlu.core;
2

  
3
import java.io.File;
4

  
5
import org.txm.PostTXMHOMEInstallationStep;
6
import org.txm.Toolbox;
7
import org.txm.objects.Workspace;
8
import org.txm.utils.BundleUtils;
9

  
10
public class InstallGroovyCONNLUFiles extends PostTXMHOMEInstallationStep {
11

  
12
	final static String createfolders[] = {"scripts/groovy/lib", "scripts/groovy/user", "scripts/groovy/system"}; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
13
	
14
	@Override
15
	public boolean do_install(Workspace workspace) {
16
		File txmhomedir = new File(Toolbox.getTxmHomePath());
17
		
18
		
19
		for (String folder : createfolders) {
20
			new File(txmhomedir, folder).mkdirs();
21
		}
22
		File scriptsDirectory = new File(txmhomedir, "scripts/groovy");
23
		File userDirectory = new File(scriptsDirectory, "user");
24
		File systemDirectory = new File(scriptsDirectory, "system");
25
		
26
		String bundle_id = "org.txm.connlu.core";
27
		File scriptsPackageDirectory = new File(userDirectory, "org/txm/scripts/importer");
28
		File scriptsPackageDirectory2 = new File(systemDirectory, "org/txm/scripts/importer");
29
		scriptsPackageDirectory.mkdirs();
30
		scriptsPackageDirectory2.mkdirs();
31
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory);
32
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory2);
33
	
34
		return scriptsDirectory.exists();
35
	}
36

  
37
	@Override
38
	public String getName() {
39
		return "CONNLU (org.txm.connlu.core)";
40
	}
41
	
42
	@Override
43
	public boolean needsReinstall(Workspace workspace) {
44
		File txmhomedir = new File(Toolbox.getTxmHomePath());
45
		for (String folder : createfolders) {
46
			if (!new File(txmhomedir, folder).exists()) {
47
				return true;
48
			}
49
		}
50
		return false;
51
	}
52
}
0 53

  
tmp/org.txm.connlu.core/build.properties (revision 2112)
1
source.. = src/
2
output.. = bin/
3
bin.includes = META-INF/,\
4
               .,\
5
               groovy/,\
6
               plugin.xml
7
qualifier=svn
0 8

  

Also available in: Unified diff