Révision 3120

tmp/org.txm.udpipe.feature/.project (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.udpipe.feature</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.pde.FeatureBuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
	</buildSpec>
14
	<natures>
15
		<nature>org.eclipse.pde.FeatureNature</nature>
16
	</natures>
17
</projectDescription>
0 18

  
tmp/org.txm.udpipe.feature/build.properties (revision 3120)
1
bin.includes = feature.xml
0 2

  
tmp/org.txm.udpipe.feature/feature.xml (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<feature
3
      id="org.txm.udpipe.feature"
4
      label="org.txm.udpipe.feature"
5
      version="1.0.0.qualifier">
6

  
7
   <description url="http://www.example.com/description">
8
      [Enter Feature Description here.]
9
   </description>
10

  
11
   <copyright url="http://www.example.com/copyright">
12
      [Enter Copyright Description here.]
13
   </copyright>
14

  
15
   <license url="http://www.example.com/license">
16
      [Enter License Description here.]
17
   </license>
18

  
19
   <plugin
20
         id="org.txm.libs.udpipe"
21
         download-size="0"
22
         install-size="0"
23
         version="0.0.0"/>
24

  
25
   <plugin
26
         id="org.txm.udpipe.core"
27
         download-size="0"
28
         install-size="0"
29
         version="0.0.0"
30
         unpack="false"/>
31

  
32
   <plugin
33
         id="org.txm.udpipe.rcp"
34
         download-size="0"
35
         install-size="0"
36
         version="0.0.0"
37
         unpack="false"/>
38

  
39
</feature>
0 40

  
tmp/org.txm.udpipe.rcp/plugin.xml (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<?eclipse version="3.4"?>
3
<plugin>
4
   <extension
5
         point="org.eclipse.ui.preferencePages">
6
      <page
7
            category="org.txm.rcp.preferences.NLPPreferencePage"
8
            class="org.txm.udpipe.rcp.preferences.UDPipePreferencePage"
9
            id="org.txm.udpipe.rcp.preferences.UDPipePreferencePage"
10
            name="UDPipe">
11
      </page>
12
   </extension>
13

  
14
</plugin>
0 15

  
tmp/org.txm.udpipe.rcp/.settings/org.eclipse.jdt.core.prefs (revision 3120)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
4
org.eclipse.jdt.core.compiler.compliance=1.8
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.8
0 8

  
tmp/org.txm.udpipe.rcp/.classpath (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
9
	<classpathentry kind="src" path="src"/>
10
	<classpathentry kind="output" path="bin"/>
11
</classpath>
0 12

  
tmp/org.txm.udpipe.rcp/META-INF/MANIFEST.MF (revision 3120)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: org.txm.udpipe.rcp
4
Bundle-SymbolicName: org.txm.udpipe.rcp;singleton:=true
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Vendor: textometrie.org
7
Automatic-Module-Name: org.txm.udpipe.rcp
8
BBundle-RequiredExecutionEnvironment: JavaSE-10
9
Require-Bundle: org.txm.udpipe.core;bundle-version="1.0.0";visibility:=reexport,
10
 org.txm.annotation.rcp;bundle-version="1.0.0";visibility:=reexport
0 11

  
tmp/org.txm.udpipe.rcp/.project (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.udpipe.rcp</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.udpipe.rcp/src/org/txm/udpipe/rcp/preferences/UDPipePreferencePage.java (revision 3120)
1
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.udpipe.rcp.preferences;
29

  
30
import org.eclipse.jface.preference.DirectoryFieldEditor;
31
import org.eclipse.ui.IWorkbench;
32
import org.txm.rcp.preferences.RCPPreferencesPage;
33
import org.txm.rcp.preferences.TXMPreferenceStore;
34
import org.txm.udpipe.core.UDPipePreferences;
35

  
36
/**
37
 * UDPipe preferences page.
38
 * 
39
 * @author mdecorde
40
 *
41
 */
42
public class UDPipePreferencePage extends RCPPreferencesPage {
43
	
44
	@Override
45
	public void createFieldEditors() {
46
		
47
		this.addField(new DirectoryFieldEditor(UDPipePreferences.MODELS_PATH, "Models directory", this.getFieldEditorParent()));
48
		
49
	}
50
	
51
	@Override
52
	public void init(IWorkbench workbench) {
53
		this.setPreferenceStore(new TXMPreferenceStore(UDPipePreferences.getInstance().getPreferencesNodeQualifier()));
54
	}
55
}
0 56

  
tmp/org.txm.udpipe.rcp/build.properties (revision 3120)
1
source.. = src/
2
output.. = bin/
3
bin.includes = META-INF/,\
4
               .,\
5
               plugin.xml
0 6

  
tmp/org.txm.libs.udpipe/.settings/org.eclipse.jdt.core.prefs (revision 3120)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
4
org.eclipse.jdt.core.compiler.compliance=1.8
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.8
0 8

  
tmp/org.txm.libs.udpipe/.classpath (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry exported="true" kind="lib" path="lib/hk2-api-2.6.1.jar"/>
4
	<classpathentry exported="true" kind="lib" path="lib/hk2-locator-2.6.1.jar"/>
5
	<classpathentry exported="true" kind="lib" path="lib/hk2-utils-2.6.1.jar"/>
6
	<classpathentry exported="true" kind="lib" path="lib/jakarta.ws.rs-api-2.1.6.jar"/>
7
	<classpathentry exported="true" kind="lib" path="lib/jersey-client.jar"/>
8
	<classpathentry exported="true" kind="lib" path="lib/jersey-common.jar"/>
9
	<classpathentry exported="true" kind="lib" path="lib/jersey-container-servlet-core.jar"/>
10
	<classpathentry exported="true" kind="lib" path="lib/jersey-container-servlet.jar"/>
11
	<classpathentry exported="true" kind="lib" path="lib/jersey-hk2.jar"/>
12
	<classpathentry exported="true" kind="lib" path="lib/jersey-media-jaxb.jar"/>
13
	<classpathentry exported="true" kind="lib" path="lib/jersey-media-json-binding.jar"/>
14
	<classpathentry exported="true" kind="lib" path="lib/jersey-media-sse.jar"/>
15
	<classpathentry exported="true" kind="lib" path="lib/jersey-server.jar"/>
16
	<classpathentry exported="true" kind="lib" path="lib/udpipe.jar"/>
17
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
18
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
19
	<classpathentry kind="src" path="src"/>
20
	<classpathentry kind="output" path="bin"/>
21
</classpath>
0 22

  
tmp/org.txm.libs.udpipe/META-INF/MANIFEST.MF (revision 3120)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: org.txm.libs.udpipe
4
Bundle-SymbolicName: org.txm.libs.udpipe
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-Activator: org.txm.libs.udpipe.Activator
7
Require-Bundle: org.eclipse.core.runtime
8
BBundle-RequiredExecutionEnvironment: JavaSE-10
9
Automatic-Module-Name: org.txm.libs.udpipe
10
Bundle-ActivationPolicy: lazy
11
Bundle-ClassPath: lib/hk2-api-2.6.1.jar,
12
 lib/hk2-locator-2.6.1.jar,
13
 lib/hk2-utils-2.6.1.jar,
14
 lib/jakarta.ws.rs-api-2.1.6.jar,
15
 lib/jersey-client.jar,
16
 lib/jersey-common.jar,
17
 lib/jersey-container-servlet-core.jar,
18
 lib/jersey-container-servlet.jar,
19
 lib/jersey-hk2.jar,
20
 lib/jersey-media-jaxb.jar,
21
 lib/jersey-media-json-binding.jar,
22
 lib/jersey-media-sse.jar,
23
 lib/jersey-server.jar,
24
 lib/udpipe.jar,
25
 .
26
Export-Package: .,
27
 com.sun.research.ws.wadl,
28
 cz.cuni.mff.ufal.udpipe,
29
 javax.ws.rs,
30
 javax.ws.rs.client,
31
 javax.ws.rs.container,
32
 javax.ws.rs.core,
33
 javax.ws.rs.ext,
34
 javax.ws.rs.sse,
35
 jersey.repackaged.org.objectweb.asm,
36
 org.glassfish.hk2.api,
37
 org.glassfish.hk2.api.messaging,
38
 org.glassfish.hk2.extension,
39
 org.glassfish.hk2.internal,
40
 org.glassfish.hk2.utilities,
41
 org.glassfish.hk2.utilities.binding,
42
 org.glassfish.hk2.utilities.cache,
43
 org.glassfish.hk2.utilities.cache.internal,
44
 org.glassfish.hk2.utilities.general,
45
 org.glassfish.hk2.utilities.general.internal,
46
 org.glassfish.hk2.utilities.reflection,
47
 org.glassfish.hk2.utilities.reflection.internal,
48
 org.glassfish.jersey,
49
 org.glassfish.jersey.client,
50
 org.glassfish.jersey.client.authentication,
51
 org.glassfish.jersey.client.filter,
52
 org.glassfish.jersey.client.inject,
53
 org.glassfish.jersey.client.internal,
54
 org.glassfish.jersey.client.internal.inject,
55
 org.glassfish.jersey.client.internal.jdkconnector,
56
 org.glassfish.jersey.client.internal.routing,
57
 org.glassfish.jersey.client.spi,
58
 org.glassfish.jersey.inject.hk2,
59
 org.glassfish.jersey.internal,
60
 org.glassfish.jersey.internal.config,
61
 org.glassfish.jersey.internal.guava,
62
 org.glassfish.jersey.internal.inject,
63
 org.glassfish.jersey.internal.jsr166,
64
 org.glassfish.jersey.internal.l10n,
65
 org.glassfish.jersey.internal.routing,
66
 org.glassfish.jersey.internal.sonar,
67
 org.glassfish.jersey.internal.spi,
68
 org.glassfish.jersey.internal.util,
69
 org.glassfish.jersey.internal.util.collection,
70
 org.glassfish.jersey.jaxb.internal,
71
 org.glassfish.jersey.jsonb,
72
 org.glassfish.jersey.jsonb.internal,
73
 org.glassfish.jersey.logging,
74
 org.glassfish.jersey.media.sse,
75
 org.glassfish.jersey.media.sse.internal,
76
 org.glassfish.jersey.message,
77
 org.glassfish.jersey.message.internal,
78
 org.glassfish.jersey.model,
79
 org.glassfish.jersey.model.internal,
80
 org.glassfish.jersey.model.internal.spi,
81
 org.glassfish.jersey.process,
82
 org.glassfish.jersey.process.internal,
83
 org.glassfish.jersey.server,
84
 org.glassfish.jersey.server.filter,
85
 org.glassfish.jersey.server.filter.internal,
86
 org.glassfish.jersey.server.internal,
87
 org.glassfish.jersey.server.internal.inject,
88
 org.glassfish.jersey.server.internal.monitoring,
89
 org.glassfish.jersey.server.internal.monitoring.core,
90
 org.glassfish.jersey.server.internal.monitoring.jmx,
91
 org.glassfish.jersey.server.internal.process,
92
 org.glassfish.jersey.server.internal.routing,
93
 org.glassfish.jersey.server.internal.scanning,
94
 org.glassfish.jersey.server.internal.sonar,
95
 org.glassfish.jersey.server.model,
96
 org.glassfish.jersey.server.model.internal,
97
 org.glassfish.jersey.server.monitoring,
98
 org.glassfish.jersey.server.spi,
99
 org.glassfish.jersey.server.spi.internal,
100
 org.glassfish.jersey.server.wadl,
101
 org.glassfish.jersey.server.wadl.config,
102
 org.glassfish.jersey.server.wadl.internal,
103
 org.glassfish.jersey.server.wadl.internal.generators,
104
 org.glassfish.jersey.server.wadl.internal.generators.resourcedoc,
105
 org.glassfish.jersey.server.wadl.internal.generators.resourcedoc.model,
106
 org.glassfish.jersey.server.wadl.internal.generators.resourcedoc.xhtml,
107
 org.glassfish.jersey.server.wadl.processor,
108
 org.glassfish.jersey.servlet,
109
 org.glassfish.jersey.servlet.async,
110
 org.glassfish.jersey.servlet.init,
111
 org.glassfish.jersey.servlet.init.internal,
112
 org.glassfish.jersey.servlet.internal,
113
 org.glassfish.jersey.servlet.internal.spi,
114
 org.glassfish.jersey.servlet.spi,
115
 org.glassfish.jersey.spi,
116
 org.glassfish.jersey.uri,
117
 org.glassfish.jersey.uri.internal,
118
 org.jvnet.hk2.annotations,
119
 org.jvnet.hk2.component,
120
 org.jvnet.hk2.external.generator,
121
 org.jvnet.hk2.external.runtime,
122
 org.jvnet.hk2.internal,
123
 org.txm.libs.udpipe
0 124

  
tmp/org.txm.libs.udpipe/.project (revision 3120)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.libs.udpipe</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  
tmp/org.txm.libs.udpipe/src/org/txm/libs/udpipe/Activator.java (revision 3120)
1
package org.txm.libs.udpipe;
2

  
3
import org.osgi.framework.BundleActivator;
4
import org.osgi.framework.BundleContext;
5

  
6
public class Activator implements BundleActivator {
7
	
8
	private static BundleContext context;
9
	
10
	public static BundleContext getContext() {
11
		return context;
12
	}
13
	
14
	/*
15
	 * (non-Javadoc)
16
	 * @see org.osgi.framework.BundleActivator#start(org.osgi.framework.BundleContext)
17
	 */
18
	@Override
19
	public void start(BundleContext bundleContext) throws Exception {
20
		Activator.context = bundleContext;
21
	}
22
	
23
	/*
24
	 * (non-Javadoc)
25
	 * @see org.osgi.framework.BundleActivator#stop(org.osgi.framework.BundleContext)
26
	 */
27
	@Override
28
	public void stop(BundleContext bundleContext) throws Exception {
29
		Activator.context = null;
30
	}
31
	
32
}
0 33

  
tmp/org.txm.libs.udpipe/build.properties (revision 3120)
1
source.. = src/
2
output.. = bin/
3
bin.includes = META-INF/,\
4
               .,\
5
               lib/
0 6

  
tmp/org.txm.udpipe.core/src/org/txm/udpipe/core/UDPipeClientUtils.java (revision 3120)
1
package org.txm.udpipe.core;
2

  
3
import java.io.File;
4
import java.io.IOException;
5

  
6
import javax.ws.rs.client.Entity;
7
import javax.ws.rs.client.WebTarget;
8
import javax.ws.rs.core.Form;
9
import javax.ws.rs.core.MediaType;
10
import javax.xml.stream.XMLStreamException;
11

  
12
import org.json.JSONArray;
13
import org.json.JSONException;
14
import org.json.JSONObject;
15
import org.txm.importer.StaxIdentityParser;
16
import org.txm.utils.io.IOUtils;
17

  
18
public class UDPipeClientUtils {
19
	
20
	public static void printAll(WebTarget target, String string) {
21
		String r = target.path("models").request().accept(MediaType.TEXT_XML).get(String.class);
22
		System.out.println(r);
23
		
24
		JSONObject obj;
25
		try {
26
			JSONObject models = new JSONObject(r);
27
			JSONArray names = models.getJSONObject("models").names();
28
			for (int i = 0; i < names.length(); i++) {
29
				System.out.println(names.get(i));
30
				
31
				System.out.println(process(target, names.get(i).toString(), string));
32
			}
33
		}
34
		catch (JSONException e) {
35
			// TODO Auto-generated catch block
36
			e.printStackTrace();
37
		}
38
	}
39
	
40
	public static String process(WebTarget target, String model, String text) throws JSONException {
41
		Form form = new Form();
42
		form.param("data", text);
43
		form.param("model", model);
44
		form.param("tokenizer", "");
45
		form.param("tagger", "");
46
		form.param("parser", "");
47
		
48
		String r = target.path("process").request().accept(MediaType.TEXT_XML).post(Entity.entity(form, MediaType.APPLICATION_FORM_URLENCODED_TYPE), String.class);
49
		// System.out.println(r.toString());
50
		JSONObject obj;
51
		obj = new JSONObject(r);
52
		
53
		return obj.getString("result");
54
	}
55
	
56
	
57
	public static boolean processXMLTEI(final WebTarget target, final String model, File xmlFile, File outFile) throws JSONException, IOException, XMLStreamException {
58
		
59
		StaxIdentityParser parser = new StaxIdentityParser(xmlFile) {
60
			
61
			StringBuilder toProcess = new StringBuilder();
62
			
63
			boolean doProcess = false;
64
			
65
			int s_n = 1;
66
			
67
			@Override
68
			protected void processCharacters() throws XMLStreamException {
69
				if (doProcess) {
70
					String text = parser.getText();
71
					toProcess.append(text);
72
				}
73
				else {
74
					super.processCharacters();
75
				}
76
			}
77
			
78
			@Override
79
			protected void processStartElement() throws XMLStreamException, IOException {
80
				tokenizeAndWrite();
81
				super.processStartElement();
82
				
83
				if (parser.getLocalName().equals("text")) doProcess = true;
84
			}
85
			
86
			@Override
87
			protected void processEndElement() throws XMLStreamException {
88
				tokenizeAndWrite();
89
				super.processEndElement();
90
				
91
				if (parser.getLocalName().equals("text")) doProcess = false;
92
			}
93
			
94
			protected void tokenizeAndWrite() throws XMLStreamException {
95
				if (!doProcess) {
96
					toProcess.setLength(0);
97
					return;
98
				}
99
				if (toProcess.length() == 0) {
100
					toProcess.setLength(0);
101
					return;
102
				}
103
				String str = toProcess.toString().trim();
104
				if (str.length() == 0) {
105
					toProcess.setLength(0);
106
					return;
107
				}
108
				try {
109
					String connlu = UDPipeClientUtils.process(target, model, str);
110
					// System.out.println(connlu);
111
					
112
					String lines[] = connlu.split("\n");
113
					boolean sopen = false;
114
					for (String line : lines) {
115
						// System.out.println(line);
116
						
117
						if (line.startsWith("# newdoc")) {
118
							
119
						}
120
						else if (line.startsWith("# newpar")) {
121
							
122
						}
123
						else if (line.startsWith("# text")) {
124
							
125
						}
126
						else if (line.startsWith("# sent_id")) {
127
							if (sopen) {
128
								writer.writeEndElement(); // s
129
								writer.writeCharacters("\n");
130
							}
131
							writer.writeStartElement("s");
132
							writer.writeAttribute("n", "" + s_n++);
133
							writer.writeCharacters("\n");
134
							sopen = true;
135
						}
136
						else {
137
							
138
							String[] cols = line.split("\t");
139
							if (cols.length <= 5) continue;
140
							
141
							writer.writeStartElement("w");
142
							writer.writeAttribute("n", cols[0]);
143
							writer.writeAttribute(model + "-lemma", cols[2]);
144
							writer.writeAttribute(model + "-pos", cols[3]);
145
							writer.writeAttribute(model + "-syntax", cols[7]);
146
							writer.writeCharacters(cols[1]);
147
							writer.writeEndElement();
148
							writer.writeCharacters("\n");
149
						}
150
					}
151
					
152
					if (sopen) {
153
						writer.writeEndElement(); // s
154
						writer.writeCharacters("\n");
155
					}
156
					toProcess.setLength(0);
157
				}
158
				catch (JSONException e) {
159
					// TODO Auto-generated catch block
160
					e.printStackTrace();
161
				}
162
			}
163
		};
164
		
165
		return parser.process(outFile);
166
	}
167
	
168
	public static boolean processXML(final WebTarget target, final String model, File xmlFile) throws JSONException, IOException, XMLStreamException {
169
		
170
		StaxIdentityParser parser = new StaxIdentityParser(xmlFile) {
171
			
172
			StringBuilder toProcess = new StringBuilder();
173
			
174
			@Override
175
			protected void processCharacters() throws XMLStreamException {
176
				String text = parser.getText();
177
				toProcess.append(text);
178
			}
179
			
180
			@Override
181
			protected void processStartElement() throws XMLStreamException, IOException {
182
				tokenizeAndWrite();
183
				super.processStartElement();
184
			}
185
			
186
			@Override
187
			protected void processEndElement() throws XMLStreamException {
188
				tokenizeAndWrite();
189
				super.processEndElement();
190
			}
191
			
192
			protected void tokenizeAndWrite() throws XMLStreamException {
193
				if (toProcess.length() == 0) {
194
					toProcess.setLength(0);
195
					return;
196
				}
197
				String str = toProcess.toString().trim();
198
				if (str.length() == 0) {
199
					toProcess.setLength(0);
200
					return;
201
				}
202
				try {
203
					String connlu = UDPipeClientUtils.process(target, model, str);
204
					// System.out.println(connlu);
205
					
206
					String lines[] = connlu.split("\n");
207
					boolean sopen = false;
208
					for (String line : lines) {
209
						// System.out.println(line);
210
						
211
						if (line.startsWith("# newdoc")) {
212
							
213
						}
214
						else if (line.startsWith("# newpar")) {
215
							
216
						}
217
						else if (line.startsWith("# text")) {
218
							
219
						}
220
						else if (line.startsWith("# sent_id")) {
221
							if (sopen) {
222
								writer.writeEndElement(); // s
223
								writer.writeCharacters("\n");
224
							}
225
							writer.writeStartElement("s");
226
							writer.writeAttribute("n", line.substring("# sent_id".length() + 1));
227
							writer.writeCharacters("\n");
228
							sopen = true;
229
						}
230
						else {
231
							
232
							String[] cols = line.split("\t");
233
							if (cols.length <= 5) continue;
234
							
235
							writer.writeStartElement("w");
236
							writer.writeAttribute("n", cols[0]);
237
							writer.writeAttribute(model + "lemma", cols[2]);
238
							writer.writeAttribute(model + "pos", cols[3]);
239
							writer.writeAttribute(model + "syntax", cols[7]);
240
							writer.writeCharacters(cols[1]);
241
							writer.writeEndElement();
242
							writer.writeCharacters("\n");
243
						}
244
					}
245
					
246
					if (sopen) {
247
						writer.writeEndElement(); // s
248
						writer.writeCharacters("\n");
249
					}
250
					toProcess.setLength(0);
251
				}
252
				catch (JSONException e) {
253
					// TODO Auto-generated catch block
254
					e.printStackTrace();
255
				}
256
			}
257
		};
258
		
259
		return parser.process(new File(xmlFile.getParentFile(), "out.xml"));
260
	}
261
	
262
	public static void processTXT(WebTarget target, String model, File txtFile) throws JSONException, IOException {
263
		Form form = new Form();
264
		form.param("data", IOUtils.getText(txtFile));
265
		form.param("model", model);
266
		form.param("tokenizer", "");
267
		form.param("tagger", "");
268
		form.param("parser", "");
269
		
270
		String r = target.path("process").request().accept(MediaType.TEXT_XML).post(Entity.entity(form, MediaType.APPLICATION_FORM_URLENCODED_TYPE), String.class);
271
		// System.out.println(r.toString());
272
		JSONObject obj;
273
		obj = new JSONObject(r);
274
		String result = obj.getString("result");
275
		System.out.println(result);
276
	}
277
}
0 278

  
tmp/org.txm.udpipe.core/src/org/txm/udpipe/core/UDPipeProcessor.java (revision 3120)
1
package org.txm.udpipe.core;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.net.URI;
6
import java.util.ArrayList;
7
import java.util.Arrays;
8
import java.util.List;
9

  
10
import javax.ws.rs.client.Client;
11
import javax.ws.rs.client.ClientBuilder;
12
import javax.ws.rs.client.WebTarget;
13
import javax.ws.rs.core.UriBuilder;
14
import javax.xml.stream.XMLStreamException;
15

  
16
import org.apache.poi.util.StringUtil;
17
import org.glassfish.jersey.client.ClientConfig;
18
import org.json.JSONException;
19
import org.txm.utils.io.IOUtils;
20
import org.txm.xml.DOMIdentityHook;
21
import org.txm.xml.LocalNamesHookActivator;
22
import org.txm.xml.SimpleHook;
23
import org.txm.xml.XMLParser;
24
import org.txm.xml.XMLProcessor;
25
import org.txm.xml.XPathsHookActivator;
26

  
27
public class UDPipeProcessor {
28
	
29
	// REST
30
	private ClientConfig config;
31
	
32
	private Client client;
33
	
34
	private WebTarget target;
35
	
36
	// 1- prepare source
37
	StringBuilder srcContent = new StringBuilder(); // final SRC content for UDPipe
38
	
39
	ArrayList<String> blockPaths = new ArrayList<>(); // stores the block starting paths -> for further processing
40
	
41
	// 2- store result
42
	private String[] newpars;
43
	
44
	// 3- reinsject result
45
	
46
	public UDPipeProcessor() {
47
		this(getBaseURI());
48
	}
49
	
50
	public UDPipeProcessor(URI adress) {
51
		config = new ClientConfig();
52
		client = ClientBuilder.newClient(config);
53
		target = client.target(getBaseURI());
54
	}
55
	
56
	public WebTarget getTarget() {
57
		return target;
58
	}
59
	
60
	public boolean buildSRC(File xmlFile, List<String> limits) throws IOException, XMLStreamException {
61
		XMLParser blocksConstructor = new XMLParser(xmlFile);
62
		
63
		LocalNamesHookActivator activator = new LocalNamesHookActivator(null, limits);
64
		
65
		new SimpleHook<XMLParser>("src builder", activator, blocksConstructor) {
66
			
67
			StringBuilder toProcess = new StringBuilder(); // temporally stores text to build blocks
68
			
69
			private String startingPath;
70
			
71
			@Override
72
			protected void processStartElement() {}
73
			
74
			@Override
75
			protected void processEndElement() {}
76
			
77
			@Override
78
			protected void processCharacters() {
79
				String text = this.parser.getText();
80
				toProcess.append(text);
81
			}
82
			
83
			@Override
84
			public boolean deactivate() {
85
				
86
				if (toProcess.length() == 0) {
87
					toProcess.setLength(0);
88
					return true; // nothing was done
89
				}
90
				String str = toProcess.toString().trim();
91
				if (str.length() == 0) {
92
					toProcess.setLength(0);
93
					return true; // really nothing was done
94
				}
95
				
96
				blockPaths.add(startingPath);
97
				srcContent.append(str.replaceAll("[\n]++", " ") + "\n\n"); // add content
98
				
99
				toProcess.setLength(0); // reset the buffer
100
				return true;
101
			}
102
			
103
			@Override
104
			public boolean _activate() {
105
				startingPath = parentParser.getCurrentPath();
106
				return true;
107
			}
108
		};
109
		
110
		if (blocksConstructor.process()) {
111
			System.out.println("N blocks: " + blockPaths.size());
112
			IOUtils.write(new File("/home/mdecorde/TEMP/src.txt"), srcContent.toString());
113
			return true;
114
		}
115
		else {
116
			return false;
117
		}
118
	}
119
	
120
	public boolean apply(String model) throws JSONException {
121
		System.out.println("APPLYING UDPIPE...");
122
		String connlu = UDPipeJavaUtils.toConnluString(new File(model), srcContent.toString());
123
		
124
		try {
125
			IOUtils.write(new File("/home/mdecorde/TEMP/applyraw.txt"), connlu);
126
		}
127
		catch (Exception e) {
128
			// TODO Auto-generated catch block
129
			e.printStackTrace();
130
		}
131
		
132
		newpars = connlu.split("# newpar");
133
		System.out.println("N newpars: " + newpars.length);
134
		
135
		String[] newpars2 = new String[newpars.length - 1];
136
		System.arraycopy(newpars, 1, newpars2, 0, newpars.length - 1); // the first newpar is empty (contains only #newdoc)
137
		newpars = newpars2;
138
		System.out.println("N newpars: " + newpars.length);
139
		try {
140
			IOUtils.write(new File("/home/mdecorde/TEMP/apply.txt"), StringUtil.join(newpars, "\n\n\n"));
141
			IOUtils.write(new File("/home/mdecorde/TEMP/apply0.txt"), newpars[0]);
142
			IOUtils.write(new File("/home/mdecorde/TEMP/apply1.txt"), newpars[newpars.length - 1]);
143
		}
144
		catch (Exception e) {
145
			// TODO Auto-generated catch block
146
			e.printStackTrace();
147
		}
148
		return true;
149
	}
150
	
151
	// Arrays.asList("text", "div", "ab", "p", "lg")
152
	public boolean inject(File xmlFile, File outFile) throws JSONException, IOException, XMLStreamException {
153
		
154
		XMLProcessor injector = new XMLProcessor(xmlFile);
155
		XPathsHookActivator activator = new XPathsHookActivator(null, blockPaths);
156
		DOMIdentityHook hook = new DOMIdentityHook("injector", activator, injector) {
157
			
158
			int n = 0;
159
			
160
			@Override
161
			public void processDom() {
162
				// System.out.println("" + n + ": injecting " + newpars[n++] + " in words");
163
			}
164
		};
165
		
166
		return injector.process(outFile);
167
	}
168
	// System.out.println("INJECTING ANNOTATIONS...");
169
	// StaxIdentityParser finalParser = new StaxIdentityParser(xmlFile) {
170
	//
171
	// StringBuilder toProcess = new StringBuilder();
172
	//
173
	// boolean doProcess = false;
174
	//
175
	// int n = 1;
176
	//
177
	// int s_n = 1;
178
	//
179
	// @Override
180
	// protected void processCharacters() throws XMLStreamException {
181
	// if (doProcess) {
182
	// String text = parser.getText();
183
	// toProcess.append(text);
184
	// }
185
	// else {
186
	// super.processCharacters();
187
	// }
188
	// }
189
	//
190
	// @Override
191
	// protected void processStartElement() throws XMLStreamException, IOException {
192
	// if (divTags == null || divTags.size() == 0 || divTags.contains(parser.getLocalName())) tokenizeAndWrite();
193
	// super.processStartElement();
194
	//
195
	// if (parser.getLocalName().equals("text")) doProcess = true;
196
	// }
197
	//
198
	// @Override
199
	// protected void processEndElement() throws XMLStreamException {
200
	// if (divTags == null || divTags.size() == 0 || divTags.contains(parser.getLocalName())) tokenizeAndWrite();
201
	// super.processEndElement();
202
	//
203
	// if (parser.getLocalName().equals("text")) doProcess = false;
204
	// }
205
	//
206
	// protected void tokenizeAndWrite() throws XMLStreamException {
207
	// if (!doProcess) {
208
	// toProcess.setLength(0);
209
	// return;
210
	// }
211
	// if (toProcess.length() == 0) {
212
	// toProcess.setLength(0);
213
	// return;
214
	// }
215
	// String str = toProcess.toString().trim();
216
	// if (str.length() == 0) {
217
	// toProcess.setLength(0);
218
	// return;
219
	// }
220
	//
221
	// String connlu = newpars[n++];
222
	// // System.out.println(connlu);
223
	//
224
	// String lines[] = connlu.split("\n");
225
	// boolean sopen = false;
226
	// for (String line : lines) {
227
	// // System.out.println(line);
228
	//
229
	// if (line.startsWith("# newdoc")) {
230
	//
231
	// }
232
	// else if (line.startsWith("# newpar")) {
233
	//
234
	// }
235
	// else if (line.startsWith("# text")) {
236
	//
237
	// }
238
	// else if (line.startsWith("# sent_id")) {
239
	// if (sopen) {
240
	// writer.writeEndElement(); // s
241
	// writer.writeCharacters("\n");
242
	// }
243
	// writer.writeStartElement("s");
244
	// writer.writeAttribute("n", "" + s_n++);
245
	// writer.writeCharacters("\n");
246
	// sopen = true;
247
	// }
248
	// else {
249
	// String[] cols = line.split("\t");
250
	// if (cols.length <= 5) continue;
251
	//
252
	// writer.writeStartElement("w");
253
	// writer.writeAttribute("n", cols[0]);
254
	// writer.writeAttribute(model + "-lemma", cols[2]);
255
	// writer.writeAttribute(model + "-pos", cols[3]);
256
	// writer.writeAttribute(model + "-syntax", cols[7]);
257
	// writer.writeCharacters(cols[1]);
258
	// writer.writeEndElement();
259
	// writer.writeCharacters("\n");
260
	// }
261
	// }
262
	//
263
	// if (sopen) {
264
	// writer.writeEndElement(); // s
265
	// writer.writeCharacters("\n");
266
	// }
267
	// toProcess.setLength(0);
268
	// }
269
	// };
270
	//
271
	// return finalParser.process(outFile);
272
	// }
273
	
274
	public static void main(String[] args) throws JSONException, IOException, XMLStreamException {
275
		
276
		File i = new File("/home/mdecorde/xml/tdm80j/tdm80j.xml");
277
		File o = new File("/home/mdecorde/xml/tdm80j/tdm80j-out.xml");
278
		
279
		UDPipeProcessor upp = new UDPipeProcessor();
280
		
281
		// UDPipeUtils.printAll(upp.getTarget(), "un test");
282
		
283
		System.out.println("PREPARE: " + upp.buildSRC(i, Arrays.asList("div", "ab", "p", "lg")));
284
		
285
		System.out.println("APPLY: " + upp.apply("fr-sequoia"));
286
		
287
		System.out.println("INJECTION: " + upp.inject(i, o));
288
		
289
		// System.out.println(process(target, "fr-gsd", "un test simple"));
290
		
291
		// printAll(target, "un test simple");
292
		
293
		// processTXT(target, "fr-sequoia", new File("/home/mdecorde/xml/voeuxtxt/0001.txt"));
294
		
295
		// long time = System.currentTimeMillis();
296
		// processXMLTEI(target, "fr-sequoia", new File("/home/mdecorde/xml/tdm80j/tdm80j.xml"), new File("/home/mdecorde/xml/tdm80j/tdm80j-out.xml"));
297
		// System.out.println("TIME1= " + (System.currentTimeMillis() - time));
298
		//
299
		// long time2 = System.currentTimeMillis();
300
		// processXMLTEIOpti(target, "fr-sequoia", new File("/home/mdecorde/xml/tdm80j/tdm80j.xml"), new File("/home/mdecorde/xml/tdm80j/tdm80j-out-opti.xml"));
301
		// System.out.println("TIME2= " + (System.currentTimeMillis() - time2));
302
	}
303
	
304
	/**
305
	 * to use if udpipe server was started with :
306
	 * ./udpipe_server 7878 fr-sequoia fr-sequoia french-sequoia-ud-2.4-190531.udpipe ""
307
	 * 
308
	 * @return default UDPipe URL
309
	 */
310
	private static URI getBaseURI() {
311
		return UriBuilder.fromUri("http://localhost:7878").build();
312
	}
313
}
0 314

  
tmp/org.txm.udpipe.core/src/org/txm/udpipe/core/UDPipePreferences.java (revision 3120)
1
package org.txm.udpipe.core;
2

  
3

  
4
import java.io.BufferedReader;
5
import java.io.File;
6
import java.io.IOException;
7
import java.util.Properties;
8

  
9
import org.osgi.framework.Version;
10
import org.osgi.service.prefs.Preferences;
11
import org.txm.Toolbox;
12
import org.txm.core.preferences.TXMPreferences;
13
import org.txm.utils.BundleUtils;
14
import org.txm.utils.io.FileCopy;
15
import org.txm.utils.io.IOUtils;
16
import org.txm.utils.logger.Log;
17

  
18
/**
19
 * UDPipe preferences initializer.
20
 * 
21
 * @author mdecorde
22
 *
23
 */
24
public class UDPipePreferences extends TXMPreferences {
25
	
26
	/**
27
	 * contains the last bundle version setting the TreeTagger models directory
28
	 */
29
	public static final String INSTALLED_MODELS_VERSION = "installed_models_version"; //$NON-NLS-1$
30
	
31
	/**
32
	 * Models path.
33
	 */
34
	public static final String MODELS_PATH = "models_path"; //$NON-NLS-1$
35
	
36
	/**
37
	 * Gets the instance.
38
	 * 
39
	 * @return the instance
40
	 */
41
	public static TXMPreferences getInstance() {
42
		if (!TXMPreferences.instances.containsKey(UDPipePreferences.class)) {
43
			new UDPipePreferences();
44
		}
45
		return TXMPreferences.instances.get(UDPipePreferences.class);
46
	}
47
	
48
	
49
	@Override
50
	public void initializeDefaultPreferences() {
51
		super.initializeDefaultPreferences();
52
		
53
		// FIXME: SJ: some code in this method should only be done at first run or update (eg. modifying chmod, etc.)
54
		
55
		// Default preferences if no org.txm.udpipe.core fragment is found
56
		Preferences preferences = this.getDefaultPreferencesNode();
57
		
58
		// FIXME: SJ: became useless since we embed it in fragments?
59
		String installPath = "/usr/lib/UDPipe"; // "System.getProperty("osgi.user.area") + "/TXM/udpipe"; //$NON-NLS-1$
60
		if (System.getProperty("os.name").contains("Windows")) {
61
			installPath = "C:/Program Files/UDPipe";
62
		}
63
		else if (System.getProperty("os.name").contains("Mac")) {
64
			installPath = "/Applications/UDPipe";
65
		}
66
		
67
		preferences.put(MODELS_PATH, installPath + "/models"); //$NON-NLS-1$
68
		
69
		// FIXME: need to validate this code + need to check if it's still useful
70
		String mversion = UDPipePreferences.getInstance().getString(INSTALLED_MODELS_VERSION);
71
		
72
		// if TXM is launch for the first time bversion and mversion valus are empty
73
		if (mversion == null || mversion.equals("")) {
74
			
75
			// Restore previous TreeTagger preferences
76
			File previousPreferenceFile = new File(System.getProperty("java.io.tmpdir"), "org.txm.rcp.prefs"); //$NON-NLS-1$ //$NON-NLS-2$
77
			
78
			if (System.getProperty("os.name").indexOf("Mac") >= 0) { //$NON-NLS-1$ //$NON-NLS-2$
79
				previousPreferenceFile = new File("/tmp/org.txm.rcp.prefs"); //$NON-NLS-1$
80
			}
81
			
82
			if (previousPreferenceFile.exists()) {
83
				try {
84
					System.out.println("Restoring preferences (from " + previousPreferenceFile + ")."); //$NON-NLS-1$ //$NON-NLS-2$
85
					Properties previousProperties = new Properties();
86
					BufferedReader reader = IOUtils.getReader(previousPreferenceFile, "ISO-8859-1"); //$NON-NLS-1$
87
					previousProperties.load(reader);
88
					
89
					String[] keys = { MODELS_PATH };
90
					for (String k : keys) {
91
						if (previousProperties.getProperty(previousProperties.getProperty(k)) != null) {
92
							preferences.put(k, installPath);
93
							UDPipePreferences.getInstance().put(k, previousProperties.getProperty(k));
94
						}
95
					}
96
				}
97
				catch (Exception e) {
98
					e.printStackTrace();
99
				}
100
			}
101
			
102
			UDPipePreferences.getInstance().put(INSTALLED_MODELS_VERSION, "0.0.0");
103
			
104
			mversion = UDPipePreferences.getInstance().getString(INSTALLED_MODELS_VERSION);
105
		}
106
		
107
		// look for org.txm.udpipe.core.<osname> fragment
108
		String mfragmentid = "org.txm.udpipe.core.models";
109
		String bfragmentid = "org.txm.udpipe.core";
110
		String osname = System.getProperty("os.name").toLowerCase();
111
		if (osname.contains("windows")) {
112
			osname = "win32";
113
		}
114
		else if (osname.contains("mac os x")) {
115
			osname = "macosx";
116
		}
117
		else {
118
			osname = "linux";
119
		}
120
		bfragmentid += "." + osname;
121
		
122
		Version currentMVersion = new Version(mversion);
123
		
124
		Version modelsFragmentVersion = BundleUtils.getBundleVersion(mfragmentid);
125
		if (modelsFragmentVersion != null && modelsFragmentVersion.compareTo(currentMVersion) >= 0) { // udpate models path!
126
			Log.fine("Updating TreeTagger models path..."); //$NON-NLS-1$
127
			File path = BundleUtils.getBundleFile(mfragmentid);
128
			
129
			File installModelsDir = new File(path, "res/models"); //$NON-NLS-1$
130
			File modelsDir = new File(Toolbox.getTxmHomePath(), "udpipe-models"); //$NON-NLS-1$
131
			modelsDir.mkdirs();
132
			try {
133
				FileCopy.copyFiles(installModelsDir, modelsDir);
134
				preferences.put(MODELS_PATH, modelsDir.getAbsolutePath());
135
				UDPipePreferences.getInstance().put(INSTALLED_MODELS_VERSION, modelsFragmentVersion.toString());
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff