Révision 3315
TXM/trunk/org.txm.conllu.rcp/.project (revision 3315) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>org.txm.conllu.rcp</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/importsection/CoNLLUSection.java (revision 3315) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.importsection; |
|
2 |
|
|
3 |
import org.eclipse.swt.SWT; |
|
4 |
import org.eclipse.swt.widgets.Button; |
|
5 |
import org.eclipse.swt.widgets.Composite; |
|
6 |
import org.eclipse.swt.widgets.Label; |
|
7 |
import org.eclipse.swt.widgets.Text; |
|
8 |
import org.eclipse.ui.forms.events.ExpansionAdapter; |
|
9 |
import org.eclipse.ui.forms.events.ExpansionEvent; |
|
10 |
import org.eclipse.ui.forms.widgets.FormToolkit; |
|
11 |
import org.eclipse.ui.forms.widgets.ScrolledForm; |
|
12 |
import org.eclipse.ui.forms.widgets.TableWrapData; |
|
13 |
import org.eclipse.ui.forms.widgets.TableWrapLayout; |
|
14 |
import org.osgi.service.prefs.Preferences; |
|
15 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
16 |
import org.txm.objects.Project; |
|
17 |
import org.txm.rcp.editors.imports.sections.ImportEditorSection; |
|
18 |
|
|
19 |
public class CoNLLUSection extends ImportEditorSection { |
|
20 |
|
|
21 |
String ID = CoNLLUSection.class.getSimpleName(); |
|
22 |
|
|
23 |
private static final int SECTION_SIZE = 1; |
|
24 |
|
|
25 |
Button useNewDocIdButton; |
|
26 |
Button keepWordContractionsButton; |
|
27 |
Text udPropertiesPrefixButton; |
|
28 |
private Text headPropertiesText; |
|
29 |
private Text depsPropertiesText; |
|
30 |
|
|
31 |
/** |
|
32 |
* |
|
33 |
* @param toolkit2 |
|
34 |
* @param form2 |
|
35 |
* @param parent |
|
36 |
* @param stylesave |
|
37 |
* |
|
38 |
* @param moduleParams |
|
39 |
* @param scriptName temporary parameter to detect if import module is xtzLoader.groovy |
|
40 |
*/ |
|
41 |
public CoNLLUSection(FormToolkit toolkit2, ScrolledForm form2, Composite parent, int style) { |
|
42 |
|
|
43 |
super(toolkit2, form2, parent, style, "CoNLL-U"); |
|
44 |
|
|
45 |
TableWrapLayout layout = new TableWrapLayout(); |
|
46 |
layout.makeColumnsEqualWidth = true; |
|
47 |
layout.numColumns = 1; |
|
48 |
this.section.setLayout(layout); |
|
49 |
this.section.setLayoutData(getSectionGridData(SECTION_SIZE)); |
|
50 |
this.section.setEnabled(false); |
|
51 |
|
|
52 |
this.section.addExpansionListener(new ExpansionAdapter() { |
|
53 |
|
|
54 |
@Override |
|
55 |
public void expansionStateChanged(ExpansionEvent e) { |
|
56 |
form.layout(true); |
|
57 |
} |
|
58 |
}); |
|
59 |
|
|
60 |
Composite sectionClient = toolkit.createComposite(this.section); |
|
61 |
TableWrapLayout slayout = new TableWrapLayout(); |
|
62 |
slayout.numColumns = 4; |
|
63 |
sectionClient.setLayout(slayout); |
|
64 |
this.section.setClient(sectionClient); |
|
65 |
|
|
66 |
useNewDocIdButton = toolkit.createButton(sectionClient, "Use new doc id when importing CoNLL-U files", SWT.CHECK); |
|
67 |
TableWrapData gdata2 = getButtonLayoutData(); |
|
68 |
gdata2.colspan = 4; // one line |
|
69 |
useNewDocIdButton.setLayoutData(gdata2); |
|
70 |
|
|
71 |
keepWordContractionsButton = toolkit.createButton(sectionClient, "Keep word contractions when importing CoNLL-U files", SWT.CHECK); |
|
72 |
gdata2 = getButtonLayoutData(); |
|
73 |
gdata2.colspan = 4; // one line |
|
74 |
keepWordContractionsButton.setLayoutData(gdata2); |
|
75 |
|
|
76 |
udPropertiesPrefixButton = toolkit.createText(sectionClient, "UD properties prefix", SWT.CHECK); |
|
77 |
gdata2 = getButtonLayoutData(); |
|
78 |
gdata2.colspan = 4; // one line |
|
79 |
udPropertiesPrefixButton.setLayoutData(gdata2); |
|
80 |
|
|
81 |
Label tmp4Label = toolkit.createLabel(sectionClient, "Head properties to project"); |
|
82 |
tmp4Label.setToolTipText("Comma separated list of ud properties."); |
|
83 |
tmp4Label.setLayoutData(getLabelGridData()); |
|
84 |
|
|
85 |
headPropertiesText = toolkit.createText(sectionClient, "UD head properties to project (comma separated list)", SWT.BORDER); |
|
86 |
gdata2 = getTextGridData(); |
|
87 |
gdata2.colspan = 3; // one line |
|
88 |
headPropertiesText.setLayoutData(gdata2); |
|
89 |
|
|
90 |
tmp4Label = toolkit.createLabel(sectionClient, "Deps properties to project"); |
|
91 |
tmp4Label.setToolTipText("Comma separated list of ud properties."); |
|
92 |
tmp4Label.setLayoutData(getLabelGridData()); |
|
93 |
|
|
94 |
// build text edition or not button |
|
95 |
depsPropertiesText = toolkit.createText(sectionClient, "UD deps properties to project (comma separated list)", SWT.BORDER); |
|
96 |
gdata2 = getTextGridData(); |
|
97 |
gdata2.colspan = 3; // one line |
|
98 |
depsPropertiesText.setLayoutData(gdata2); |
|
99 |
} |
|
100 |
|
|
101 |
@Override |
|
102 |
public void updateFields(Project project) { |
|
103 |
if (this.section.isDisposed()) return; |
|
104 |
if (project == null) return; |
|
105 |
|
|
106 |
Preferences customNode = project.getImportParameters().node("conllu"); |
|
107 |
|
|
108 |
useNewDocIdButton.setSelection(customNode.getBoolean(UDPreferences.IMPORT_USE_NEW_DOC_ID, true)); //$NON-NLS-1$ |
|
109 |
keepWordContractionsButton.setSelection(customNode.getBoolean(UDPreferences.KEEP_CONTRACTIONS, true)); //$NON-NLS-1$ |
|
110 |
udPropertiesPrefixButton.setText(customNode.get(UDPreferences.UDPREFIX, "")); //$NON-NLS-1$ |
|
111 |
headPropertiesText.setText(customNode.get(UDPreferences.IMPORT_HEAD_TO_PROJECT, "upos,deprel")); //$NON-NLS-1$ |
|
112 |
depsPropertiesText.setText(customNode.get(UDPreferences.IMPORT_DEPS_TO_PROJECT, "upos,deprel")); //$NON-NLS-1$ |
|
113 |
} |
|
114 |
|
|
115 |
@Override |
|
116 |
public boolean saveFields(Project project) { |
|
117 |
if (this.section != null && !this.section.isDisposed()) { |
|
118 |
|
|
119 |
Preferences customNode = project.getImportParameters().node("conllu"); |
|
120 |
customNode.putBoolean(UDPreferences.IMPORT_USE_NEW_DOC_ID, useNewDocIdButton.getSelection()); //$NON-NLS-1$ |
|
121 |
customNode.putBoolean(UDPreferences.KEEP_CONTRACTIONS, keepWordContractionsButton.getSelection()); //$NON-NLS-1$ |
|
122 |
customNode.put(UDPreferences.UDPREFIX, udPropertiesPrefixButton.getText()); //$NON-NLS-1$ |
|
123 |
customNode.put(UDPreferences.IMPORT_HEAD_TO_PROJECT, headPropertiesText.getText()); //$NON-NLS-1$ |
|
124 |
customNode.put(UDPreferences.IMPORT_DEPS_TO_PROJECT, depsPropertiesText.getText()); //$NON-NLS-1$ |
|
125 |
} |
|
126 |
return true; |
|
127 |
} |
|
128 |
|
|
129 |
@Override |
|
130 |
public boolean checkFields() { |
|
131 |
return true; |
|
132 |
} |
|
133 |
|
|
134 |
@Override |
|
135 |
public int getSectionSize() { |
|
136 |
return SECTION_SIZE; |
|
137 |
} |
|
138 |
} |
|
0 | 139 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/importsection/CoNLLUAnnotationSectionConfigurator.java (revision 3315) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.importsection; |
|
2 |
|
|
3 |
import org.txm.rcp.editors.imports.ImportEditorSectionConfigurator; |
|
4 |
import org.txm.rcp.editors.imports.ImportModuleCustomization; |
|
5 |
|
|
6 |
|
|
7 |
public class CoNLLUAnnotationSectionConfigurator extends ImportEditorSectionConfigurator { |
|
8 |
|
|
9 |
@Override |
|
10 |
public void installSections() { |
|
11 |
|
|
12 |
ImportModuleCustomization.additionalSections.put("conlluLoader.groovy", CoNLLUSection.class); |
|
13 |
} |
|
14 |
} |
|
0 | 15 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ExportCorpusAsCoNLLU.java (revision 3315) | ||
---|---|---|
1 |
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.conllu.rcp.commands; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.io.IOException; |
|
32 |
import java.io.PrintWriter; |
|
33 |
import java.util.ArrayList; |
|
34 |
import java.util.HashMap; |
|
35 |
import java.util.HashSet; |
|
36 |
|
|
37 |
import org.apache.commons.lang.StringUtils; |
|
38 |
import org.eclipse.core.commands.AbstractHandler; |
|
39 |
import org.eclipse.core.commands.ExecutionEvent; |
|
40 |
import org.eclipse.core.commands.ExecutionException; |
|
41 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
42 |
import org.eclipse.osgi.util.NLS; |
|
43 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
44 |
import org.kohsuke.args4j.Option; |
|
45 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
46 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
|
47 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
48 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
49 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
50 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
|
51 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
52 |
import org.txm.searchengine.cqp.corpus.StructuralUnit; |
|
53 |
import org.txm.searchengine.cqp.corpus.WordProperty; |
|
54 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
55 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
56 |
import org.txm.utils.i18n.LangFormater; |
|
57 |
import org.txm.utils.io.IOUtils; |
|
58 |
import org.txm.utils.logger.Log; |
|
59 |
|
|
60 |
/** |
|
61 |
* Export the conllu properties and CQP words into a conllu corpus of several files (one per text) |
|
62 |
* |
|
63 |
* @author mdecorde. |
|
64 |
*/ |
|
65 |
public class ExportCorpusAsCoNLLU extends AbstractHandler { |
|
66 |
|
|
67 |
public static final String ID = ExportCorpusAsCoNLLU.class.getName(); |
|
68 |
|
|
69 |
@Option(name = "conlluResultDirectory", usage = "conlluResultDirectory", widget = "Folder", required = true, def = "conllu-result-directory") |
|
70 |
File conlluResultDirectory; |
|
71 |
|
|
72 |
@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-") |
|
73 |
String propertiesPrefix; |
|
74 |
|
|
75 |
@Option(name = "separator", usage = "Options", widget = "Separator", required = true, def = "comment properties") |
|
76 |
Boolean separator = false; |
|
77 |
|
|
78 |
@Option(name = "insertParagraphs", usage = "Insert paragraph marks in the CoNLLU corpus", widget = "Boolean", required = true, def = "true") |
|
79 |
Boolean insertParagraphs = false; |
|
80 |
|
|
81 |
@Option(name = "detectGap", usage = "Insert gap comment using the CQP 'gap' property", widget = "Boolean", required = true, def = "true") |
|
82 |
Boolean detectGap = false; |
|
83 |
|
|
84 |
@Option(name = "separator3", usage = "Options", widget = "Separator", required = true, def = "tokens options") |
|
85 |
Boolean separator3 = false; |
|
86 |
|
|
87 |
@Option(name = "insertNoSpaceAfter", usage = "Insert the NoSpaceAfter misc property if not initially in the CoNLLU corpus", widget = "Boolean", required = true, def = "true") |
|
88 |
Boolean insertNoSpaceAfter = true; |
|
89 |
|
|
90 |
@Option(name = "insertTokenWithoutUdAnnotations", usage = "if checked words without ud annotations are exported as well", widget = "Boolean", required = false, def = "false") |
|
91 |
Boolean insertTokenWithoutUdAnnotations; |
|
92 |
|
|
93 |
// "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" }; |
|
94 |
@Option(name = "separator_properties", usage = "Options", widget = "Separator", required = true, def = "fixing UD properties") |
|
95 |
Boolean separator_properties = false; |
|
96 |
|
|
97 |
@Option(name = "defaultFormPropertyName", usage = "optional CQP property to fix the missing 'form' ud property", widget = "String", required = false, def = "") |
|
98 |
String defaultFormPropertyName; |
|
99 |
|
|
100 |
@Option(name = "defaultLemmaPropertyName", usage = "optional CQP property to fix the missing 'lemma' ud property", widget = "String", required = false, def = "") |
|
101 |
String defaultLemmaPropertyName; |
|
102 |
|
|
103 |
@Option(name = "defaultUposPropertyName", usage = "optional CQP property to fix the missing 'upos' ud property", widget = "String", required = false, def = "") |
|
104 |
String defaultUposPropertyName; |
|
105 |
|
|
106 |
@Option(name = "defaultXposPropertyName", usage = "optional CQP property to fix the missing 'xpos' ud property", widget = "String", required = false, def = "") |
|
107 |
String defaultXposPropertyName; |
|
108 |
|
|
109 |
@Option(name = "defaultFeatsPropertyName", usage = "optional CQP property to fix the missing 'feats' ud property", widget = "String", required = false, def = "") |
|
110 |
String defaultFeatsPropertyName; |
|
111 |
|
|
112 |
@Option(name = "defaultHeadPropertyName", usage = "optional CQP property to fix the missing 'head' ud property", widget = "String", required = false, def = "") |
|
113 |
String defaultHeadPropertyName; |
|
114 |
|
|
115 |
@Option(name = "defaultDeprelPropertyName", usage = "optional CQP property to fix the missing 'deprel' ud property", widget = "String", required = false, def = "") |
|
116 |
String defaultDeprelPropertyName; |
|
117 |
|
|
118 |
@Option(name = "defaultDepsPropertyName", usage = "optional CQP property to fix the missing 'deps' ud property", widget = "String", required = false, def = "") |
|
119 |
String defaultDepsPropertyName; |
|
120 |
|
|
121 |
@Option(name = "defaultMiscPropertyName", usage = "optional CQP property to fix the missing 'misc' ud property", widget = "String", required = false, def = "") |
|
122 |
String defaultMiscPropertyName; |
|
123 |
|
|
124 |
@Option(name = "separator2", usage = "Options", widget = "Separator", required = true, def = "sentence fix options") |
|
125 |
Boolean separator2 = false; |
|
126 |
|
|
127 |
@Option(name = "openingPunct", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "[\\-–«‘“\\(]") |
|
128 |
String openingPunct; |
|
129 |
|
|
130 |
/** |
|
131 |
* the UD property suffixes, will be used to create the CQP properties like propertiesPrefix + suffix |
|
132 |
*/ |
|
133 |
public static String[] propNames = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" }; |
|
134 |
|
|
135 |
/* |
|
136 |
* (non-Javadoc) |
|
137 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
|
138 |
*/ |
|
139 |
@Override |
|
140 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
|
141 |
|
|
142 |
IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
|
143 |
|
|
144 |
Object s = selection.getFirstElement(); |
|
145 |
if (!(s instanceof MainCorpus)) { |
|
146 |
Log.warning("Selection is not a corpus. Aborting."); |
|
147 |
return null; |
|
148 |
} |
|
149 |
|
|
150 |
if (!ParametersDialog.open(this)) { |
|
151 |
return null; |
|
152 |
} |
|
153 |
|
|
154 |
conlluResultDirectory.mkdirs(); |
|
155 |
if (conlluResultDirectory == null || !conlluResultDirectory.exists() || !conlluResultDirectory.isDirectory()) { |
|
156 |
Log.warning("Error: conllu result directory does not exists: " + conlluResultDirectory); |
|
157 |
return null; |
|
158 |
} |
|
159 |
|
|
160 |
CQPCorpus corpus = (CQPCorpus) s; |
|
161 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
162 |
|
|
163 |
try { |
|
164 |
return exportAnnotationsAsCorpus(mainCorpus, conlluResultDirectory, propertiesPrefix, openingPunct, insertTokenWithoutUdAnnotations, |
|
165 |
defaultFormPropertyName, defaultLemmaPropertyName, defaultUposPropertyName, defaultXposPropertyName, |
|
166 |
defaultFeatsPropertyName, defaultHeadPropertyName, defaultDeprelPropertyName, defaultDepsPropertyName, |
|
167 |
defaultMiscPropertyName, |
|
168 |
detectGap, insertParagraphs, insertNoSpaceAfter); |
|
169 |
} |
|
170 |
catch (Exception e) { |
|
171 |
Log.warning(e); |
|
172 |
Log.printStackTrace(e); |
|
173 |
} |
|
174 |
|
|
175 |
return null; |
|
176 |
} |
|
177 |
|
|
178 |
/** |
|
179 |
* export the corpus in a directory of conllu files (one per text) |
|
180 |
* |
|
181 |
* @param mainCorpus |
|
182 |
* @param conlluResultDirectory |
|
183 |
* @param prefix |
|
184 |
* @param openingPunct |
|
185 |
* @param insertTokenWithoutUdAnnotations |
|
186 |
* @param defaultFormPropertyName |
|
187 |
* @param defaultLemmaPropertyName |
|
188 |
* @param defaultUposPropertyName |
|
189 |
* @param defaultXposPropertyName |
|
190 |
* @param detectGap |
|
191 |
* @param insertParagraphs |
|
192 |
* @param insertNoSpaceAfter |
|
193 |
* @return the number of annotation exported |
|
194 |
* @throws UnexpectedAnswerException |
|
195 |
* @throws IOException |
|
196 |
* @throws CqiServerError |
|
197 |
* @throws CqiClientException |
|
198 |
* @throws InvalidCqpIdException |
|
199 |
*/ |
|
200 |
public static int exportAnnotationsAsCorpus(MainCorpus mainCorpus, File conlluResultDirectory, String prefix, String openingPunct, boolean insertTokenWithoutUdAnnotations, |
|
201 |
String defaultFormPropertyName, String defaultLemmaPropertyName, String defaultUposPropertyName, String defaultXposPropertyName, |
|
202 |
String defaultFeatsPropertyName, String defaultHeadPropertyName, String defaultDeprelPropertyName, String defaultDepsPropertyName, |
|
203 |
String defaultMiscPropertyName, |
|
204 |
boolean detectGap, boolean insertParagraphs, boolean insertNoSpaceAfter) |
|
205 |
throws UnexpectedAnswerException, |
|
206 |
IOException, |
|
207 |
CqiServerError, |
|
208 |
CqiClientException, InvalidCqpIdException { |
|
209 |
|
|
210 |
if (!conlluResultDirectory.exists()) { |
|
211 |
conlluResultDirectory.mkdirs(); |
|
212 |
} |
|
213 |
int numberOfWordsWritten = 0; |
|
214 |
int numberOfSentencesWritten = 0; |
|
215 |
int numberOfTextsWritten = 0; |
|
216 |
|
|
217 |
String[] textIds = mainCorpus.getCorpusTextIdsList(); |
|
218 |
int[] start_limits = mainCorpus.getTextStartLimits(); |
|
219 |
int[] end_limits = mainCorpus.getTextEndLimits(); |
|
220 |
|
|
221 |
String lang = mainCorpus.getLang(); |
|
222 |
// HashSet<String> beforeSpacesRules = new HashSet<>(LangFormater.getNoSpaceBefore(mainCorpus.getLang())); |
|
223 |
// HashSet<String> afterSpacesRules = new HashSet<>(LangFormater.getNoSpaceAfter(mainCorpus.getLang())); |
|
224 |
|
|
225 |
for (String p : propNames) { |
|
226 |
WordProperty wp = mainCorpus.getProperty(prefix + p); |
|
227 |
if (wp == null) { |
|
228 |
Log.warning("Error: cannot find the Conllu property: " + prefix + p); |
|
229 |
return 0; |
|
230 |
} |
|
231 |
} |
|
232 |
|
|
233 |
if (insertTokenWithoutUdAnnotations && (defaultFormPropertyName == null || mainCorpus.getProperty(defaultFormPropertyName) == null)) { |
|
234 |
Log.warning("Error: the defaultFormPropertyName parameter needs to be set if insertTokenWithoutUdAnnotations is set to true"); |
|
235 |
return 0; |
|
236 |
} |
|
237 |
|
|
238 |
for (int iText = 0; iText < start_limits.length; iText++) { |
|
239 |
|
|
240 |
// Build corpus positions |
|
241 |
int[] positions = new int[end_limits[iText] - start_limits[iText] + 1]; |
|
242 |
int tmp = 0; |
|
243 |
for (int n = start_limits[iText]; n <= end_limits[iText]; n++) { |
|
244 |
positions[tmp++] = n; |
|
245 |
} |
|
246 |
numberOfWordsWritten += positions.length; |
|
247 |
|
|
248 |
// Get UD properties |
|
249 |
WordProperty wp; |
|
250 |
wp = mainCorpus.getProperty(prefix + "id"); |
|
251 |
String[] tmpValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
252 |
int[] idValues = new int[tmpValues.length]; |
|
253 |
for (int iId = 0; iId < tmpValues.length; iId++) { |
|
254 |
if (tmpValues[iId].length() > 0 && !tmpValues[iId].equals("_") && !tmpValues[iId].equals("__UNDEF__")) { |
|
255 |
idValues[iId] = Integer.parseInt(tmpValues[iId]); |
|
256 |
} |
|
257 |
else { |
|
258 |
idValues[iId] = 0; |
|
259 |
} |
|
260 |
} |
|
261 |
tmpValues = null; |
|
262 |
|
|
263 |
WordProperty formWordProperty = mainCorpus.getProperty(prefix + "form"); |
|
264 |
String[] formValues = CQPSearchEngine.getCqiClient().cpos2Str(formWordProperty.getQualifiedName(), positions); |
|
265 |
fixUNDEFValues(formValues); |
|
266 |
|
|
267 |
wp = mainCorpus.getProperty(prefix + "lemma"); |
|
268 |
String[] lemmaValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
269 |
fixUNDEFValues(lemmaValues); |
|
270 |
|
|
271 |
wp = mainCorpus.getProperty(prefix + "upos"); |
|
272 |
String[] uposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
273 |
fixUNDEFValues(uposValues); |
|
274 |
|
|
275 |
wp = mainCorpus.getProperty(prefix + "xpos"); |
|
276 |
String[] xposValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
277 |
fixUNDEFValues(xposValues); |
|
278 |
|
|
279 |
wp = mainCorpus.getProperty(prefix + "feats"); |
|
280 |
String[] featsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
281 |
fixUNDEFValues(featsValues); |
|
282 |
|
|
283 |
wp = mainCorpus.getProperty(prefix + "head"); |
|
284 |
// String[] headValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
285 |
tmpValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
286 |
int[] headValues = new int[tmpValues.length]; |
|
287 |
for (int iId = 0; iId < tmpValues.length; iId++) { |
|
288 |
if (tmpValues[iId].length() > 0 && !tmpValues[iId].equals("_") && !tmpValues[iId].equals("__UNDEF__")) { |
|
289 |
headValues[iId] = Integer.parseInt(tmpValues[iId]); |
|
290 |
} |
|
291 |
else { |
|
292 |
headValues[iId] = -1; |
|
293 |
} |
|
294 |
} |
|
295 |
tmpValues = null; |
|
296 |
|
|
297 |
wp = mainCorpus.getProperty(prefix + "deprel"); |
|
298 |
String[] deprelValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
299 |
fixUNDEFValues(deprelValues); |
|
300 |
|
|
301 |
wp = mainCorpus.getProperty(prefix + "deps"); |
|
302 |
String[] depsValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
303 |
fixUNDEFValues(depsValues); |
|
304 |
|
|
305 |
wp = mainCorpus.getProperty(prefix + "misc"); |
|
306 |
String[] miscValues = CQPSearchEngine.getCqiClient().cpos2Str(wp.getQualifiedName(), positions); |
|
307 |
fixUNDEFValues(miscValues); |
|
308 |
|
|
309 |
HashSet<Integer> paragraphsStartPositions = new HashSet<>(); |
|
310 |
if (insertParagraphs) { |
|
311 |
StructuralUnit p_struct = mainCorpus.getStructuralUnit("p"); |
|
312 |
if (p_struct == null) { |
|
313 |
Log.warning(NLS.bind("** insertParagraph parameter is set, but there are no *p* structure (no paragraph) in the {0} CQP corpus. The insertParagraph parameter will be ignored.", |
|
314 |
mainCorpus)); |
|
315 |
} |
|
316 |
else { |
|
317 |
for (int position : mainCorpus.query(new CQLQuery("<p> [_.text_id=\"" + textIds[iText] + "\"]"), "textParagraphPositions", false).getStarts()) { |
|
318 |
paragraphsStartPositions.add(position); |
|
319 |
} |
|
320 |
} |
|
321 |
} |
|
322 |
|
|
323 |
HashMap<Integer, String> sentidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"sentid"); |
|
324 |
HashMap<Integer, String> newdocidStartPositions = getNonUNDEFPositionsAndValues(mainCorpus, prefix+"newdocid"); |
|
325 |
|
|
326 |
// build sentence, first pass using UD word sentence positions |
|
327 |
ArrayList<ArrayList<Integer>> sentences = new ArrayList<>(); |
|
328 |
ArrayList<Integer> tmpSentence = new ArrayList<>(); |
|
329 |
for (int p = 0; p < positions.length; p++) { |
|
330 |
// System.out.println("p=" + p + " id=" + idValues[p] + " form=" + formValues[p] + " lemma=" + lemmaValues[p] + " upos=" + uposValues[p] + " xpos=" + xposValues[p] + " feats=" |
|
331 |
// + featsValues[p] + " head=" |
|
332 |
// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]); |
|
333 |
if (sentidStartPositions.containsKey(p)) { // new ud sentence |
|
334 |
|
|
335 |
if (tmpSentence.size() > 0) { |
|
336 |
sentences.add(new ArrayList<>(tmpSentence)); |
|
337 |
} |
|
338 |
|
|
339 |
// System.out.println("new sentence: " + " id=" + idValues[p] + " form=" + formValues[p] + " lemma=" + lemmaValues[p] + " upos=" + uposValues[p] + " xpos=" + xposValues[p] + " |
|
340 |
// feats=" |
|
341 |
// + featsValues[p] + " head=" |
|
342 |
// + headValues[p] + " deprel=" + deprelValues[p] + " deps=" + depsValues[p] + " misc=" + miscValues[p]); |
|
343 |
tmpSentence.clear(); |
|
344 |
} |
|
345 |
|
|
346 |
if (insertTokenWithoutUdAnnotations) { |
|
347 |
tmpSentence.add(p); // insert all tokens |
|
348 |
} |
|
349 |
else if (idValues[p] != 0) { |
|
350 |
tmpSentence.add(p); // insert all tokens |
|
351 |
} |
|
352 |
|
|
353 |
} |
|
354 |
positions = null; // free memory |
|
355 |
|
|
356 |
// fixing sentences |
|
357 |
for (int s = 0; s < sentences.size(); s++) { |
|
358 |
|
|
359 |
// fix only ud sentences limits |
|
360 |
ArrayList<Integer> sentence = sentences.get(s); |
|
361 |
|
|
362 |
if (sentidStartPositions.get(sentence.get(0)) == null) { |
|
363 |
continue; // this is not a UD sentence |
|
364 |
} |
|
365 |
|
|
366 |
int max = -1; |
|
367 |
int imax = 0; |
|
368 |
for (int ip = 0; ip < sentence.size(); ip++) { |
|
369 |
int p = sentence.get(ip); |
|
370 |
if (max < idValues[p]) { |
|
371 |
max = idValues[p]; |
|
372 |
imax = ip; |
|
373 |
} |
|
374 |
} |
|
375 |
|
|
376 |
ArrayList<Integer> newSentence = new ArrayList<>(); |
|
377 |
for (int ip = imax + 1; ip < sentence.size(); ip++) { |
|
378 |
newSentence.add(sentence.get(ip)); |
|
379 |
sentence.remove(ip); |
|
380 |
ip--; |
|
381 |
} |
|
382 |
if (newSentence.size() == 1) { // the new sentence size is 1, resinsert it |
|
383 |
sentence.addAll(newSentence); |
|
384 |
newSentence.clear(); |
|
385 |
} |
|
386 |
if (newSentence.size() > 0) { |
|
387 |
//System.out.println("INSERT " + newSentence); |
|
388 |
sentences.add(s + 1, newSentence); |
|
389 |
} |
|
390 |
} |
|
391 |
|
|
392 |
if (tmpSentence.size() > 0) { // add last sentence |
|
393 |
sentences.add(new ArrayList<>(tmpSentence)); |
|
394 |
} |
|
395 |
|
|
396 |
// fixing sentence __NULL__ ud properties |
|
397 |
for (int iSentence = 0; iSentence < sentences.size(); iSentence++) { |
|
398 |
ArrayList<Integer> sentence = sentences.get(iSentence); |
|
399 |
|
|
400 |
int[] sentencePositions = new int[sentence.size()]; |
|
401 |
for (int p = 0; p < sentence.size(); p++) { |
|
402 |
sentencePositions[p] = sentence.get(p); |
|
403 |
} |
|
404 |
|
|
405 |
// get CQP values fixing "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps" |
|
406 |
String[] ids = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("id").getQualifiedName(), sentencePositions); |
|
407 |
|
|
408 |
String[] words = null; |
|
409 |
if (defaultFormPropertyName != null && defaultFormPropertyName.length() > 0) { |
|
410 |
words = getDefaultValues(mainCorpus, defaultFormPropertyName, sentencePositions); |
|
411 |
} |
|
412 |
String[] lemmas = null; |
|
413 |
if (defaultLemmaPropertyName != null && defaultLemmaPropertyName.length() > 0) { |
|
414 |
lemmas = getDefaultValues(mainCorpus, defaultLemmaPropertyName, sentencePositions); |
|
415 |
} |
|
416 |
String[] uposs = null; |
|
417 |
if (defaultUposPropertyName != null && defaultUposPropertyName.length() > 0) { |
|
418 |
uposs = getDefaultValues(mainCorpus, defaultUposPropertyName, sentencePositions); |
|
419 |
} |
|
420 |
String[] xposs = null; |
|
421 |
if (defaultXposPropertyName != null && defaultXposPropertyName.length() > 0) { |
|
422 |
xposs = getDefaultValues(mainCorpus, defaultXposPropertyName, sentencePositions); |
|
423 |
} |
|
424 |
|
|
425 |
String[] feats = null; |
|
426 |
if (defaultFeatsPropertyName != null && defaultFeatsPropertyName.length() > 0) { |
|
427 |
feats = getDefaultValues(mainCorpus, defaultFeatsPropertyName, sentencePositions); |
|
428 |
} |
|
429 |
String[] heads = null; |
|
430 |
if (defaultHeadPropertyName != null && defaultHeadPropertyName.length() > 0) { |
|
431 |
heads = getDefaultValues(mainCorpus, defaultHeadPropertyName, sentencePositions); |
|
432 |
} |
|
433 |
String[] deprels = null; |
|
434 |
if (defaultDeprelPropertyName != null && defaultDeprelPropertyName.length() > 0) { |
|
435 |
deprels = getDefaultValues(mainCorpus, defaultDeprelPropertyName, sentencePositions); |
|
436 |
} |
|
437 |
String[] depss = null; |
|
438 |
if (defaultDepsPropertyName != null && defaultDepsPropertyName.length() > 0) { |
|
439 |
depss = getDefaultValues(mainCorpus, defaultDepsPropertyName, sentencePositions); |
|
440 |
} |
|
441 |
String[] miscs = null; |
|
442 |
if (defaultMiscPropertyName != null && defaultMiscPropertyName.length() > 0) { |
|
443 |
miscs = getDefaultValues(mainCorpus, defaultMiscPropertyName, sentencePositions); |
|
444 |
} |
|
445 |
|
|
446 |
// String[] feats = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(featsCorrPropertyName).getQualifiedName(), sentencePositions); |
|
447 |
// String[] head = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(headCorrPropertyName).getQualifiedName(), sentencePositions); |
|
448 |
// String[] deprel = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions); |
|
449 |
// String[] deps = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(deprelCorrPropertyName).getQualifiedName(), sentencePositions); |
|
450 |
|
|
451 |
// fix ud properties using CQP values |
|
452 |
for (int ip = 0; ip < sentence.size(); ip++) { |
|
453 |
|
|
454 |
int p = sentence.get(ip); |
|
455 |
|
|
456 |
// new word |
|
457 |
if (miscValues[p].equals("_")) { |
|
458 |
miscValues[p] = "XmlId=" + ids[ip]; |
|
459 |
} |
|
460 |
|
|
461 |
// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" }; |
|
462 |
if (words != null && formValues[p].equals("_")) { |
|
463 |
formValues[p] = words[ip]; |
|
464 |
} |
|
465 |
if (lemmas != null && lemmaValues[p].equals("_")) { |
|
466 |
lemmaValues[p] = lemmas[ip]; |
|
467 |
} |
|
468 |
if (uposs != null && uposValues[p].equals("_")) { |
|
469 |
uposValues[p] = uposs[ip]; |
|
470 |
} |
|
471 |
if (xposs != null && xposValues[p].equals("_")) { |
|
472 |
xposValues[p] = xposs[ip]; |
|
473 |
} |
|
474 |
if (feats != null && featsValues[p].equals("_")) { |
|
475 |
featsValues[p] = feats[ip]; |
|
476 |
} |
|
477 |
if (heads != null && headValues[p] < 0) { |
|
478 |
headValues[p] = Integer.parseInt(heads[ip]); |
|
479 |
} |
|
480 |
if (deprels != null && deprelValues[p].equals("_")) { |
|
481 |
deprelValues[p] = deprels[ip]; |
|
482 |
} |
|
483 |
if (depss != null && depsValues[p].equals("_")) { |
|
484 |
depsValues[p] = depss[ip]; |
|
485 |
} |
|
486 |
if (miscs != null && miscValues[p].equals("_")) { |
|
487 |
miscValues[p] = miscs[ip]; |
|
488 |
} |
|
489 |
} |
|
490 |
|
|
491 |
if (insertNoSpaceAfter) { |
|
492 |
for (int ip = 0; ip < sentence.size(); ip++) { // fix SpaceAfter. !!! this needs to be done after ud properties are fixed |
|
493 |
int p = sentence.get(ip); |
|
494 |
// insertion activated |
|
495 |
if (!miscValues[p].contains("SpaceAfter=")) { // only update if not present |
|
496 |
if (LangFormater.isSpaceAfterNotNeeded(formValues[p], lang)) { |
|
497 |
miscValues[p] += "|SpaceAfter=No"; |
|
498 |
} |
|
499 |
else if (formValues.length > (p + 1) && LangFormater.isSpaceBeforeNotNeeded(formValues[p + 1], lang)) { |
|
500 |
// if next token needs a space before, set SpaceAfter=Yes to the previous token |
|
501 |
miscValues[p] += "|SpaceAfter=No"; |
|
502 |
} |
|
503 |
} |
|
504 |
} |
|
505 |
} |
|
506 |
|
|
507 |
// fixing sentence punct limits |
|
508 |
while (sentence.size() > 0 && iSentence > 0 && formValues[sentence.get(0)].matches("\\p{P}") && !formValues[sentence.get(0)].matches(openingPunct)) { |
|
509 |
// System.out.println("FIXING: first non-openingPunct position " + formValues[sentence.get(0)] + " in " + iSentence); |
|
510 |
int p2 = sentence.remove(0); |
|
511 |
sentences.get(iSentence - 1).add(p2); |
|
512 |
} |
|
513 |
// |
|
514 |
while (sentence.size() > 0 && iSentence + 1 < sentences.size() && formValues[sentence.get(sentence.size() - 1)].matches(openingPunct)) { |
|
515 |
// System.out.println("FIXING: last openingPunct position " + formValues[sentence.get(sentence.size() - 1)] + " in " + iSentence); |
|
516 |
int p2 = sentence.remove(sentence.size() - 1); |
|
517 |
sentences.get(iSentence + 1).add(0, p2); |
|
518 |
} |
|
519 |
|
|
520 |
if (sentence.size() == 0) { // sentence was depleted after fixing it |
|
521 |
sentences.remove(iSentence); |
|
522 |
iSentence--; |
|
523 |
continue; |
|
524 |
} |
|
525 |
} |
|
526 |
|
|
527 |
for (int s = 0; s < sentences.size(); s++) { |
|
528 |
|
|
529 |
// fix only ud sentences limits |
|
530 |
ArrayList<Integer> sentence = sentences.get(s); |
|
531 |
HashMap<Integer, Integer> oldToNewIds = new HashMap<>(); |
|
532 |
for (int ip = 0; ip < sentence.size(); ip++) { // computing old to new ids |
|
533 |
int p = sentence.get(ip); |
|
534 |
|
|
535 |
if (idValues[p] != 0) { // store "old id -> new id" |
|
536 |
oldToNewIds.put(idValues[p], (ip + 1)); // from 1 to N |
|
537 |
} |
|
538 |
} |
|
539 |
|
|
540 |
// fixing head and set missing head to 0 and root |
|
541 |
for (int ip = 0; ip < sentence.size(); ip++) { |
|
542 |
int p = sentence.get(ip); |
|
543 |
|
|
544 |
// fixing id value |
|
545 |
idValues[p] = (ip + 1); // from 1 to N |
|
546 |
|
|
547 |
// fixing head values |
|
548 |
if (oldToNewIds.containsKey(headValues[p])) { |
|
549 |
headValues[p] = oldToNewIds.get(headValues[p]); |
|
550 |
} |
|
551 |
else if (headValues[p] != 0) { // new word, set to default values |
|
552 |
headValues[p] = 0; |
|
553 |
deprelValues[p] = "_"; |
|
554 |
depsValues[p] = "_"; |
|
555 |
} |
|
556 |
} |
|
557 |
} |
|
558 |
|
|
559 |
// writing sentences |
|
560 |
File resultConlluFile = new File(conlluResultDirectory, textIds[iText] + ".conllu"); |
|
561 |
PrintWriter writer = IOUtils.getWriter(resultConlluFile); |
|
562 |
|
|
563 |
int iParagraph = 1; |
|
564 |
|
|
565 |
for (int iSentence = 0; iSentence < sentences.size(); iSentence++) { |
|
566 |
ArrayList<Integer> sentence = sentences.get(iSentence); |
|
567 |
|
|
568 |
int[] sentencePositions = new int[sentence.size()]; |
|
569 |
for (int p = 0; p < sentence.size(); p++) { |
|
570 |
sentencePositions[p] = sentence.get(p); |
|
571 |
} |
|
572 |
|
|
573 |
String[] gap = null; |
|
574 |
if (detectGap && mainCorpus.getProperty("gap") != null) { |
|
575 |
gap = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty("gap").getQualifiedName(), sentencePositions); |
|
576 |
} |
|
577 |
|
|
578 |
String[] tokens = new String[sentence.size()]; |
|
579 |
for (int ip = 0; ip < sentence.size(); ip++) { |
|
580 |
tokens[ip] = formValues[sentence.get(ip)]; |
|
581 |
} |
|
582 |
|
|
583 |
if (insertNoSpaceAfter) { |
|
584 |
writer.println("# text = " + LangFormater.format(StringUtils.join(tokens, " "), mainCorpus.getLang())); |
|
585 |
} |
|
586 |
else { |
|
587 |
writer.println("# text = " + StringUtils.join(tokens, " ")); |
|
588 |
} |
|
589 |
|
|
590 |
if (newdocidStartPositions.containsKey(sentence.get(0))) { |
|
591 |
writer.println("# newdoc id = " + newdocidStartPositions.get(sentence.get(0))); |
|
592 |
} |
|
593 |
else { |
|
594 |
writer.println("# newdoc id = " + textIds[iText]); |
|
595 |
} |
|
596 |
|
|
597 |
boolean foundSentId = false; |
|
598 |
for (int ip : sentence) { |
|
599 |
if (!foundSentId && sentidStartPositions.containsKey(ip)) { |
|
600 |
writer.println("# sent_id = " + sentidStartPositions.get(ip)); |
|
601 |
foundSentId = true; |
|
602 |
} |
|
603 |
} |
|
604 |
if (!foundSentId) { // no sent_id found |
|
605 |
writer.println("# sent_id = " + textIds[iText] + "-" + (iSentence + 1) + ".new"); |
|
606 |
} |
|
607 |
|
|
608 |
if (paragraphsStartPositions.contains(sentence.get(0))) { // paragraphsStartPositions is empty if the injectParagraph option is not set |
|
609 |
writer.println("# newpar id = " + iParagraph); |
|
610 |
iParagraph++; |
|
611 |
} |
|
612 |
|
|
613 |
for (int ip = 0; ip < sentence.size(); ip++) { |
|
614 |
int p = sentence.get(ip); |
|
615 |
|
|
616 |
// { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" }; |
|
617 |
writer.println(idValues[p] + "\t" + formValues[p] + "\t" + lemmaValues[p] + "\t" + uposValues[p] |
|
618 |
+ "\t" + xposValues[p] + "\t" + featsValues[p] + "\t" + headValues[p] + "\t" + deprelValues[p] |
|
619 |
+ "\t" + depsValues[p] + "\t" + miscValues[p]); |
|
620 |
|
|
621 |
if (gap != null && gap[ip].equals("next")) { |
|
622 |
writer.println("# gap"); |
|
623 |
} |
|
624 |
} |
|
625 |
writer.println(""); |
|
626 |
numberOfSentencesWritten++; |
|
627 |
} |
|
628 |
writer.close(); |
|
629 |
|
|
630 |
System.out.println(" Text done: " + resultConlluFile); |
|
631 |
numberOfTextsWritten++; |
|
632 |
} |
|
633 |
|
|
634 |
System.out.println("# words written: " + numberOfWordsWritten); |
|
635 |
System.out.println("# sentences written: " + numberOfSentencesWritten); |
|
636 |
System.out.println("# texts written: " + numberOfTextsWritten); |
|
637 |
|
|
638 |
return numberOfWordsWritten; |
|
639 |
} |
|
640 |
|
|
641 |
private static String[] getDefaultValues(MainCorpus mainCorpus, String property, int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException { |
|
642 |
String[] values = CQPSearchEngine.getCqiClient().cpos2Str(mainCorpus.getProperty(property).getQualifiedName(), positions); |
|
643 |
for (int iupos = 0; iupos < values.length; iupos++) { // recode the || CQP multiple values to ud multiple values |
|
644 |
if (values[iupos].length() > 2 && values[iupos].startsWith("|") && values[iupos].endsWith("|")) { |
|
645 |
values[iupos] = values[iupos].substring(1, values[iupos].length() - 1); |
|
646 |
} |
|
647 |
} |
|
648 |
|
|
649 |
return values; |
|
650 |
} |
|
651 |
|
|
652 |
private static HashMap<Integer, String> getNonUNDEFPositionsAndValues(MainCorpus mainCorpus, String property) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException { |
|
653 |
HashMap<Integer, String> sentidStartPositions = new HashMap<>(); |
|
654 |
int[] ids = CQPSearchEngine.getCqiClient().regex2Id(mainCorpus.getProperty(property).getQualifiedName(), "(?!__UNDEF__).+"); |
|
655 |
String[] strs = CQPSearchEngine.getCqiClient().id2Str(mainCorpus.getProperty(property).getQualifiedName(), ids); |
|
656 |
for (int iId = 0; iId < ids.length; iId++) { |
|
657 |
int id = ids[iId]; |
|
658 |
int[] pp = CQPSearchEngine.getCqiClient().id2Cpos(mainCorpus.getProperty(property).getQualifiedName(), id); |
|
659 |
for (int p : pp) { |
|
660 |
sentidStartPositions.put(p, strs[iId]); |
|
661 |
} |
|
662 |
} |
|
663 |
|
|
664 |
return sentidStartPositions; |
|
665 |
} |
|
666 |
|
|
667 |
private static void fixUNDEFValues(String[] values) { |
|
668 |
for (int i = 0; i < values.length; i++) { |
|
669 |
if (values[i].equals("__UNDEF__") || values[i].equals("")) { |
|
670 |
values[i] = "_"; |
|
671 |
} |
|
672 |
} |
|
673 |
} |
|
674 |
} |
|
675 |
|
|
676 |
|
|
0 | 677 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromDirectory.java (revision 3315) | ||
---|---|---|
1 |
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.conllu.rcp.commands; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.io.FileFilter; |
|
32 |
import java.io.IOException; |
|
33 |
import java.util.Arrays; |
|
34 |
import java.util.HashSet; |
|
35 |
import java.util.Set; |
|
36 |
|
|
37 |
import javax.xml.stream.XMLStreamException; |
|
38 |
|
|
39 |
import org.eclipse.core.commands.AbstractHandler; |
|
40 |
import org.eclipse.core.commands.ExecutionEvent; |
|
41 |
import org.eclipse.core.commands.ExecutionException; |
|
42 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
43 |
import org.eclipse.osgi.util.NLS; |
|
44 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
45 |
import org.kohsuke.args4j.Option; |
|
46 |
import org.txm.conllu.core.function.ImportCoNLLUAnnotations; |
|
47 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
48 |
import org.txm.core.messages.TXMCoreMessages; |
|
49 |
import org.txm.rcp.commands.workspace.UpdateCorpus; |
|
50 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
51 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
52 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
|
53 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
54 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
55 |
import org.txm.utils.logger.Log; |
|
56 |
|
|
57 |
/** |
|
58 |
* Import CoNLLU annotations into a TXM corpus |
|
59 |
* |
|
60 |
* If the corpus already contains CoNLLU annotations, they are replaced |
|
61 |
* |
|
62 |
* @author mdecorde. |
|
63 |
*/ |
|
64 |
public class ImportCoNLLUAnnotationsFromDirectory extends AbstractHandler { |
|
65 |
|
|
66 |
public static final String ID = ImportCoNLLUAnnotationsFromDirectory.class.getName(); |
|
67 |
|
|
68 |
@Option(name = "conlluDirectory", usage = "conlluDirectory", widget = "Folder", required = true, def = "conllu-directory") |
|
69 |
File conlluDirectory; |
|
70 |
|
|
71 |
@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-") |
|
72 |
String propertiesPrefix; |
|
73 |
|
|
74 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc") |
|
75 |
String udPropertiesToImport; |
|
76 |
|
|
77 |
@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
|
78 |
Boolean overwrite_cqp_properties; |
|
79 |
|
|
80 |
@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
|
81 |
Boolean normalize_word_ids; |
|
82 |
|
|
83 |
@Option(name = "headPropertiesToProject", usage = "to create the head-XYZ properties from the word head", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
|
84 |
String headPropertiesToProject; |
|
85 |
|
|
86 |
@Option(name = "depsPropertiesToProject", usage = "to create the dep-XYZ from the word dependancies", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
|
87 |
String depsPropertiesToProject; |
|
88 |
|
|
89 |
/* |
|
90 |
* (non-Javadoc) |
|
91 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
|
92 |
*/ |
|
93 |
@Override |
|
94 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
|
95 |
|
|
96 |
IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
|
97 |
|
|
98 |
Object s = selection.getFirstElement(); |
|
99 |
if (!(s instanceof MainCorpus)) { |
|
100 |
Log.warning("Selection is not a corpus. Aborting."); |
|
101 |
return null; |
|
102 |
} |
|
103 |
|
|
104 |
if (!ParametersDialog.open(this)) { |
|
105 |
return null; |
|
106 |
} |
|
107 |
if (conlluDirectory == null || !conlluDirectory.exists() || !conlluDirectory.isDirectory() || conlluDirectory.listFiles().length == 0) { |
|
108 |
Log.warning("Error: conllu directory is empty: " + conlluDirectory); |
|
109 |
return null; |
|
110 |
} |
|
111 |
|
|
112 |
CQPCorpus corpus = (CQPCorpus) s; |
|
113 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
114 |
|
|
115 |
try { |
|
116 |
HashSet<String> test = new HashSet<>(); |
|
117 |
for (String p : ImportCoNLLUAnnotations.UD_PROPERTY_NAMES) { |
|
118 |
if (mainCorpus.getProperty(propertiesPrefix + p) != null) { |
|
119 |
test.add(propertiesPrefix + p); |
|
120 |
} |
|
121 |
} |
|
122 |
if (test.size() > 0 && !overwrite_cqp_properties) { |
|
123 |
Log.warning(NLS.bind("Error: can't use the {0} prefix because some properties are already used: {1}", propertiesPrefix, test)); |
|
124 |
return null; |
|
125 |
} |
|
126 |
|
|
127 |
return importAnnotations(mainCorpus, conlluDirectory, propertiesPrefix, normalize_word_ids, |
|
128 |
new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), |
|
129 |
new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))), |
|
130 |
new HashSet<String>(Arrays.asList(udPropertiesToImport.split(",")))); |
|
131 |
} |
|
132 |
catch (Exception e) { |
|
133 |
Log.warning(e); |
|
134 |
e.printStackTrace(); |
|
135 |
} |
|
136 |
|
|
137 |
return null; |
|
138 |
} |
|
139 |
|
|
140 |
/** |
|
141 |
* |
|
142 |
* if import CoNLLU annotations in the corpus with the same name already exists, it is replaced |
|
143 |
* |
|
144 |
* @param corpus |
|
145 |
* @param conlluDirectory |
|
146 |
* @param propertiesPrefix |
|
147 |
* @return the number of imported annotations |
|
148 |
* @throws CqiClientException |
|
149 |
* @throws CqiServerError |
|
150 |
* @throws IOException |
|
151 |
* @throws XMLStreamException |
|
152 |
*/ |
|
153 |
public static int importAnnotations(MainCorpus mainCorpus, File conlluDirectory, String propertiesPrefix, Boolean normalizeWordIds, |
|
154 |
Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, CqiServerError, CqiClientException, XMLStreamException { |
|
155 |
|
|
156 |
Log.info(TXMCoreMessages.bind("Importing CONLL-u annotations of {0} in {1} using the ''{2}'' prefix...", conlluDirectory, mainCorpus, propertiesPrefix)); |
|
157 |
|
|
158 |
File[] files = conlluDirectory.listFiles(new FileFilter() { |
|
159 |
@Override |
|
160 |
public boolean accept(File file) { |
|
161 |
return file.isFile() && file.getName().endsWith(".conllu"); |
|
162 |
} |
|
163 |
}); |
|
164 |
|
|
165 |
int nTextProcessed = 0; |
|
166 |
int nWordsInserted = 0; |
|
167 |
for (File coonluFile : files) { |
|
168 |
|
|
169 |
nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport); |
|
170 |
nTextProcessed++; |
|
171 |
} |
|
172 |
|
|
173 |
if (nTextProcessed == 0) { |
|
174 |
Log.warning("** No text to process. Aborting."); |
|
175 |
return 0; |
|
176 |
} |
|
177 |
|
|
178 |
if (nWordsInserted == 0) { |
|
179 |
Log.warning("** No annotation imported. Aborting."); |
|
180 |
return 0; |
|
181 |
} |
|
182 |
|
|
183 |
Log.info("XML-TXM source files updated. Updating indexes..."); |
|
184 |
|
|
185 |
UDPreferences.getInstance().setProjectPreferenceValue(mainCorpus.getProject(), UDPreferences.UDPREFIX, propertiesPrefix); |
|
186 |
|
|
187 |
UpdateCorpus.update(mainCorpus); |
|
188 |
|
|
189 |
Log.info("Done."); |
|
190 |
|
|
191 |
return nWordsInserted; |
|
192 |
} |
|
193 |
|
|
194 |
/** |
|
195 |
* |
|
196 |
* if import CoNLLU annotations in the corpus with the same name already exists, it is replaced |
|
197 |
* |
|
198 |
* @param corpus |
|
199 |
* @param conlluFile |
|
200 |
* @param propertiesPrefix |
|
201 |
* @param normalize_word_ids |
|
202 |
* @return the number of imported annotations |
|
203 |
* @throws CqiClientException |
|
204 |
* @throws CqiServerError |
|
205 |
* @throws IOException |
|
206 |
* @throws XMLStreamException |
|
207 |
*/ |
|
208 |
public static int importAnnotationsFromCoNLLUFile(MainCorpus mainCorpus, File conlluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids, |
|
209 |
Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, |
|
210 |
CqiServerError, CqiClientException, XMLStreamException { |
|
211 |
Log.info(TXMCoreMessages.bind("Importing CONLL-u annotations of {0} in {1} using the ''{2}'' prefix...", conlluFile, mainCorpus, propertiesPrefix)); |
|
212 |
|
|
213 |
int nWordsInserted = ImportCoNLLUAnnotations._importAnnotations(conlluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport); |
|
214 |
|
|
215 |
if (nWordsInserted == 0) { |
|
216 |
Log.warning("** No annotation imported. Aborting."); |
|
217 |
return 0; |
|
218 |
} |
|
219 |
|
|
220 |
Log.info("XML-TXM source files updated. Updating indexes..."); |
|
221 |
|
|
222 |
UDPreferences.getInstance().setProjectPreferenceValue(mainCorpus.getProject(), UDPreferences.UDPREFIX, propertiesPrefix); |
|
223 |
|
|
224 |
UpdateCorpus.update(mainCorpus); |
|
225 |
|
|
226 |
Log.info("Done."); |
|
227 |
|
|
228 |
return nWordsInserted; |
|
229 |
} |
|
230 |
|
|
231 |
} |
|
0 | 232 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromFile.java (revision 3315) | ||
---|---|---|
1 |
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.conllu.rcp.commands; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.util.Arrays; |
|
32 |
import java.util.HashSet; |
|
33 |
|
|
34 |
import org.eclipse.core.commands.AbstractHandler; |
|
35 |
import org.eclipse.core.commands.ExecutionEvent; |
|
36 |
import org.eclipse.core.commands.ExecutionException; |
|
37 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
38 |
import org.eclipse.osgi.util.NLS; |
|
39 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
40 |
import org.kohsuke.args4j.Option; |
|
41 |
import org.txm.conllu.core.function.ImportCoNLLUAnnotations; |
|
42 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
43 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
|
44 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
45 |
import org.txm.utils.logger.Log; |
|
46 |
|
|
47 |
/** |
|
48 |
* Import CoNLLU annotations into a TXM corpus |
|
49 |
* |
|
50 |
* IF the corpus already contains CoNLLU annotations, they are replaced |
|
51 |
* |
|
52 |
* @author mdecorde. |
|
53 |
*/ |
|
54 |
public class ImportCoNLLUAnnotationsFromFile extends AbstractHandler { |
|
55 |
|
|
56 |
public static final String ID = ImportCoNLLUAnnotationsFromFile.class.getName(); |
|
57 |
|
|
58 |
@Option(name = "conlluFile", usage = "CoNLL-U file", widget = "FileOpen", required = true, def = "file.conllu") |
|
59 |
File conlluFile; |
|
60 |
|
|
61 |
@Option(name = "textId", usage = "Identifier of the text to update", widget = "String", required = true, def = "text-id") |
|
62 |
String textId; |
|
63 |
|
|
64 |
@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-") |
|
65 |
String propertiesPrefix; |
|
66 |
|
|
67 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc") |
|
68 |
String udPropertiesToImport; |
|
69 |
|
|
70 |
@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
|
71 |
Boolean overwrite_cqp_properties; |
|
72 |
|
|
73 |
@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
|
74 |
Boolean normalize_word_ids; |
|
75 |
|
|
76 |
@Option(name = "headPropertiesToProject", usage = "to create the headXYZ properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
|
77 |
String headPropertiesToProject; |
|
78 |
|
|
79 |
@Option(name = "depsPropertiesToProject", usage = "to create the depXYZ and outdeprel properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
|
80 |
String depsPropertiesToProject; |
|
81 |
|
|
82 |
/* |
|
83 |
* (non-Javadoc) |
|
84 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
|
85 |
*/ |
|
86 |
@Override |
|
87 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
|
88 |
|
|
89 |
IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
|
90 |
|
|
91 |
Object s = selection.getFirstElement(); |
|
92 |
if (!(s instanceof MainCorpus)) { |
|
93 |
Log.warning("Selection is not a corpus. Aborting."); |
|
94 |
return null; |
|
95 |
} |
|
96 |
|
|
97 |
if (!ParametersDialog.open(this)) { |
|
98 |
return null; |
|
99 |
} |
|
100 |
if (conlluFile == null || !conlluFile.exists() || !conlluFile.isFile()) { |
|
101 |
Log.warning("Error: cannot access to the conllu file: " + conlluFile); |
|
102 |
return null; |
|
103 |
} |
|
104 |
|
|
105 |
CQPCorpus corpus = (CQPCorpus) s; |
|
106 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
107 |
|
|
108 |
try { |
|
109 |
HashSet<String> test = new HashSet<>(); |
|
110 |
for (String p : ImportCoNLLUAnnotations.UD_PROPERTY_NAMES) { |
|
111 |
if (mainCorpus.getProperty(propertiesPrefix + p) != null) { |
|
112 |
test.add(propertiesPrefix + p); |
|
113 |
} |
|
114 |
} |
|
115 |
if (test.size() > 0 && !overwrite_cqp_properties) { |
|
116 |
Log.warning(NLS.bind("Error: can't use the {0} prefix because some properties are already used: {1}", propertiesPrefix, test)); |
|
117 |
return null; |
|
118 |
} |
|
119 |
|
|
120 |
return ImportCoNLLUAnnotationsFromDirectory.importAnnotationsFromCoNLLUFile(mainCorpus, conlluFile, propertiesPrefix, textId, normalize_word_ids, |
|
121 |
new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), |
|
122 |
new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))), |
|
123 |
new HashSet<String>(Arrays.asList(udPropertiesToImport.split(",")))); |
|
124 |
} |
|
125 |
catch (Exception e) { |
|
126 |
Log.warning(e); |
|
127 |
Log.printStackTrace(e); |
|
128 |
} |
|
129 |
|
|
130 |
return null; |
|
131 |
} |
|
132 |
} |
|
0 | 133 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/preferences/CoNLLUSearchPreferencePage.java (revision 3315) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.preferences; |
|
2 |
|
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 |
import org.eclipse.jface.preference.StringFieldEditor; |
|
5 |
import org.eclipse.ui.IWorkbench; |
|
6 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
7 |
import org.txm.rcp.IImageKeys; |
|
8 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
9 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
10 |
|
|
11 |
/** |
|
12 |
* UD preferences page |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class CoNLLUSearchPreferencePage extends TXMPreferencePage { |
|
18 |
|
|
19 |
@Override |
|
20 |
public void createFieldEditors() { |
|
21 |
this.addField(new BooleanFieldEditor(UDPreferences.IMPORT_USE_NEW_DOC_ID, "Use new odc id when importing CoNLL-U files", this.getFieldEditorParent())); |
|
22 |
this.addField(new BooleanFieldEditor(UDPreferences.KEEP_CONTRACTIONS, "Keep word contractions when importing CoNLL-U files", this.getFieldEditorParent())); |
|
23 |
this.addField(new StringFieldEditor(UDPreferences.UDPREFIX, "UD properties prefix", this.getFieldEditorParent())); |
|
24 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_HEAD_TO_PROJECT, "UD head properties to project (comma separated list)", this.getFieldEditorParent())); |
|
25 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_DEPS_TO_PROJECT, "UD deps properties to project (comma separated list)", this.getFieldEditorParent())); |
|
26 |
//this.addField(new StringFieldEditor(UDPreferences.DEFAULT_TPROPERTY, "Default T property", this.getFieldEditorParent())); |
|
27 |
//this.addField(new StringFieldEditor(UDPreferences.DEFAULT_NTPROPERTY, "Default NT property", this.getFieldEditorParent())); |
|
28 |
} |
|
29 |
|
|
30 |
/* |
|
31 |
* (non-Javadoc) |
|
32 |
* |
|
33 |
* @see |
|
34 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
35 |
*/ |
|
36 |
@Override |
|
37 |
public void init(IWorkbench workbench) { |
|
38 |
this.setPreferenceStore(new TXMPreferenceStore(UDPreferences.getInstance().getPreferencesNodeQualifier())); |
|
39 |
this.setDescription("UD"); |
|
40 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/UD.png")); |
|
41 |
} |
|
42 |
} |
|
0 | 43 |
TXM/trunk/org.txm.conllu.rcp/build.properties (revision 3315) | ||
---|---|---|
1 |
source.. = src/ |
|
2 |
output.. = bin/ |
|
3 |
bin.includes = META-INF/,\ |
|
4 |
.,\ |
|
5 |
plugin.xml,\ |
|
6 |
icons/ |
|
0 | 7 |
TXM/trunk/org.txm.conllu.rcp/plugin.xml (revision 3315) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<?eclipse version="3.4"?> |
|
3 |
<plugin> |
|
4 |
<extension |
|
5 |
point="org.eclipse.ui.commands"> |
|
6 |
<command |
|
7 |
categoryId="TreeSearch4TXM.commands.category" |
|
8 |
defaultHandler="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromDirectory" |
|
9 |
id="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromDirectory" |
|
10 |
name="Import CONLL-u Annotations from directory..."> |
|
11 |
</command> |
|
12 |
<command |
|
13 |
categoryId="TreeSearch4TXM.commands.category" |
|
14 |
defaultHandler="org.txm.conllu.rcp.commands.ExportCorpusAsCoNLLU" |
|
15 |
id="org.txm.conllu.rcp.commands.ExportCorpusAsCoNLLU" |
|
16 |
name="Export CONLL-u Annotations..."> |
|
17 |
</command> |
|
18 |
<command |
|
19 |
categoryId="TreeSearch4TXM.commands.category" |
|
20 |
defaultHandler="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromFile" |
|
21 |
id="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromFile" |
|
22 |
name="Import CONLL-u Annotations from file..."> |
|
23 |
</command> |
|
24 |
</extension> |
|
25 |
<extension |
|
26 |
point="org.eclipse.ui.menus"> |
|
27 |
<menuContribution |
|
28 |
locationURI="menu:menu.file.import?before=menu.file.import.separator.software"> |
|
29 |
<command |
|
30 |
commandId="org.txm.rcp.handlers.scripts.ExecuteImportScript" |
|
31 |
icon="icons/functions/UD.png" |
|
32 |
label="CoNLL-U + CSV" |
|
33 |
style="push"> |
|
34 |
<parameter |
|
35 |
name="org.txm.rcp.commands.commandParameter3" |
|
36 |
value="conllu/conlluLoader.groovy"> |
|
37 |
</parameter> |
|
38 |
</command> |
|
39 |
</menuContribution> |
|
40 |
<menuContribution |
|
41 |
locationURI="menu:menu.file.export"> |
|
42 |
<command |
|
43 |
commandId="org.txm.conllu.rcp.commands.ExportCorpusAsCoNLLU" |
|
44 |
icon="icons/functions/UD.png" |
|
45 |
label="Export corpus as CONLL-U..." |
|
46 |
style="push"> |
|
47 |
<visibleWhen |
|
48 |
checkEnabled="false"> |
|
49 |
<reference |
|
50 |
definitionId="OneMainCorpusSelected"> |
|
51 |
</reference> |
|
52 |
</visibleWhen> |
|
53 |
</command> |
|
54 |
</menuContribution> |
|
55 |
<menuContribution |
|
56 |
locationURI="menu:menu.corpus.import"> |
|
57 |
<command |
|
58 |
commandId="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromDirectory" |
|
59 |
icon="icons/functions/UDplus.png" |
|
60 |
style="push"> |
|
61 |
<visibleWhen |
|
62 |
checkEnabled="false"> |
|
63 |
<or> |
|
64 |
<test |
|
65 |
forcePluginActivation="true" |
|
66 |
property="org.txm.rcp.testers.TreeSearchReady" |
|
67 |
value="TreeSearchReady"> |
|
68 |
</test> |
|
69 |
<reference |
|
70 |
definitionId="OneMainCorpusSelected"> |
|
71 |
</reference> |
|
72 |
</or> |
|
73 |
</visibleWhen> |
|
74 |
</command> |
|
75 |
<command |
|
76 |
commandId="org.txm.conllu.rcp.commands.ImportCoNLLUAnnotationsFromFile" |
|
77 |
icon="icons/functions/UDplus.png" |
|
78 |
style="push"> |
|
79 |
<visibleWhen |
|
80 |
checkEnabled="false"> |
|
81 |
<or> |
|
82 |
<test |
|
83 |
forcePluginActivation="true" |
|
84 |
property="org.txm.rcp.testers.TreeSearchReady" |
|
85 |
value="TreeSearchReady"> |
|
86 |
</test> |
|
87 |
<reference |
|
88 |
definitionId="OneMainCorpusSelected"> |
|
89 |
</reference> |
|
90 |
</or> |
|
91 |
</visibleWhen> |
|
92 |
</command> |
|
93 |
</menuContribution> |
|
94 |
</extension> |
|
95 |
<extension |
|
96 |
point="org.eclipse.ui.preferencePages"> |
|
97 |
<page |
|
98 |
category="org.txm.treesearch.preferences.TreeSearchPreferencePage" |
|
99 |
class="org.txm.conllu.rcp.preferences.CoNLLUSearchPreferencePage" |
|
100 |
id="org.txm.conllu.rcp.preferences.CoNLLUSearchPreferencePage" |
|
101 |
name="UD"> |
|
102 |
</page> |
|
103 |
</extension> |
|
104 |
<extension |
|
105 |
point="org.txm.rcp.importsection"> |
|
106 |
<importsectionconfiguration |
|
107 |
class="org.txm.conllu.rcp.importsection.CoNLLUAnnotationSectionConfigurator"> |
|
108 |
</importsectionconfiguration> |
|
109 |
</extension> |
|
110 |
|
|
111 |
</plugin> |
|
0 | 112 |
Formats disponibles : Unified diff