Révision 3685
| TXM/trunk/bundles/org.txm.conllu.rcp/OSGI-INF/l10n/bundle.properties (revision 3685) | ||
|---|---|---|
| 4 | 4 |
command.label = CoNLL-U + CSV |
| 5 | 5 |
command.label.0 = Corpus in CoNLL-U format (.conllu)... |
| 6 | 6 |
command.name = from directory of CoNLL-U files (.conllu) ... |
| 7 |
command.name.0 = in CoNLL-U files (.conllu)...
|
|
| 7 |
command.name.0 = in CoNLL-U format (.conllu)...
|
|
| 8 | 8 |
command.name.1 = from a CoNLL-U file (.conllu)... |
| 9 | 9 |
|
| 10 | 10 |
page.name = UD |
| TXM/trunk/bundles/org.txm.conllu.rcp/OSGI-INF/l10n/bundle_fr.properties (revision 3685) | ||
|---|---|---|
| 4 | 4 |
command.label = CoNLL-U + CSV |
| 5 | 5 |
command.label.0 = Corpus au format CoNLL-U (.conllu)... |
| 6 | 6 |
command.name = depuis un r\u00E9pertoire de fichiers CoNLL-U (.conllu)... |
| 7 |
command.name.0 = vers des fichiers CoNLL-U (.conllu)...
|
|
| 7 |
command.name.0 = au format CoNLL-U (.conllu)...
|
|
| 8 | 8 |
command.name.1 = depuis un fichier CONLL-U... |
| 9 | 9 |
|
| 10 | 10 |
page.name = UD |
| TXM/trunk/bundles/org.txm.conllu.rcp/plugin.xml (revision 3685) | ||
|---|---|---|
| 25 | 25 |
categoryId="TreeSearch4TXM.commands.category" |
| 26 | 26 |
defaultHandler="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
| 27 | 27 |
id="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
| 28 |
name="in CoNLL-U format (.conllu)...">
|
|
| 28 |
name="%command.name.0">
|
|
| 29 | 29 |
</command> |
| 30 |
<command |
|
| 31 |
categoryId="TreeSearch4TXM.commands.category" |
|
| 32 |
defaultHandler="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
| 33 |
id="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
| 34 |
name="CoNLLU Corpus preferences"> |
|
| 35 |
</command> |
|
| 30 | 36 |
</extension> |
| 31 | 37 |
<extension |
| 32 | 38 |
point="org.eclipse.ui.menus"> |
| ... | ... | |
| 56 | 62 |
</reference> |
| 57 | 63 |
</visibleWhen> |
| 58 | 64 |
</command> |
| 59 |
<command |
|
| 60 |
commandId="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
|
| 61 |
icon="icons/functions/UD.png" |
|
| 62 |
style="push"> |
|
| 63 |
<visibleWhen |
|
| 64 |
checkEnabled="false"> |
|
| 65 |
<reference |
|
| 66 |
definitionId="OneMainCorpusSelected"> |
|
| 67 |
</reference> |
|
| 68 |
</visibleWhen> |
|
| 69 |
</command> |
|
| 70 | 65 |
</menuContribution> |
| 71 | 66 |
<menuContribution |
| 72 | 67 |
locationURI="menu:menu.file.import.annotations"> |
| ... | ... | |
| 107 | 102 |
</visibleWhen> |
| 108 | 103 |
</command> |
| 109 | 104 |
</menuContribution> |
| 105 |
<menuContribution |
|
| 106 |
locationURI="menu:menu.file.export.properties"> |
|
| 107 |
<command |
|
| 108 |
commandId="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
|
| 109 |
icon="icons/functions/UD.png" |
|
| 110 |
style="push"> |
|
| 111 |
<visibleWhen |
|
| 112 |
checkEnabled="false"> |
|
| 113 |
<reference |
|
| 114 |
definitionId="OneMainCorpusSelected"> |
|
| 115 |
</reference> |
|
| 116 |
</visibleWhen> |
|
| 117 |
</command> |
|
| 118 |
</menuContribution> |
|
| 119 |
<menuContribution |
|
| 120 |
locationURI="menu:menu.edit?after=org.txm.rcp.separator1"> |
|
| 121 |
<command |
|
| 122 |
commandId="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
| 123 |
style="push"> |
|
| 124 |
<visibleWhen |
|
| 125 |
checkEnabled="false"> |
|
| 126 |
<reference |
|
| 127 |
definitionId="OneMainCorpusSelected"> |
|
| 128 |
</reference> |
|
| 129 |
</visibleWhen> |
|
| 130 |
</command> |
|
| 131 |
</menuContribution> |
|
| 110 | 132 |
</extension> |
| 111 | 133 |
<extension |
| 112 | 134 |
point="org.eclipse.ui.preferencePages"> |
| TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ToCoNLL2009.java (revision 3685) | ||
|---|---|---|
| 19 | 19 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
| 20 | 20 |
import org.txm.utils.ConsoleProgressBar; |
| 21 | 21 |
|
| 22 |
import jline.internal.Log; |
|
| 23 |
|
|
| 22 | 24 |
public class ToCoNLL2009 {
|
| 23 | 25 |
|
| 24 | 26 |
boolean debug = false; |
| ... | ... | |
| 48 | 50 |
WordProperty idProperty = corpus.getProperty("id");
|
| 49 | 51 |
|
| 50 | 52 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.length); |
| 51 |
|
|
| 53 |
Log.info("Writing CoNLLu files in "+outdir.getAbsolutePath());
|
|
| 54 |
int nTokens = 0; |
|
| 52 | 55 |
for (String text : texts) {
|
| 53 | 56 |
|
| 54 | 57 |
cpb.tick(); |
| ... | ... | |
| 67 | 70 |
} |
| 68 | 71 |
|
| 69 | 72 |
String[] words = CorpusManager.getCorpusManager().getCqiClient().cpos2Str(word.getQualifiedName(), positions); |
| 70 |
|
|
| 73 |
nTokens += words.length; |
|
| 71 | 74 |
String[] idsList = CorpusManager.getCorpusManager().getCqiClient().cpos2Str(idProperty.getQualifiedName(), positions); |
| 72 | 75 |
|
| 73 | 76 |
String[] lemmes = null; |
| ... | ... | |
| 120 | 123 |
} |
| 121 | 124 |
|
| 122 | 125 |
cpb.done(); |
| 126 |
Log.info("Done: "+texts.length+" texts and "+nTokens+" words.");
|
|
| 123 | 127 |
return true; |
| 124 | 128 |
} |
| 125 | 129 |
|
| TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/CoNLLUCorpusPreferences.java (revision 3685) | ||
|---|---|---|
| 1 |
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté |
|
| 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
| 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
| 4 |
// Sophia Antipolis, University of Paris 3. |
|
| 5 |
// |
|
| 6 |
// The TXM platform is free software: you can redistribute it |
|
| 7 |
// and/or modify it under the terms of the GNU General Public |
|
| 8 |
// License as published by the Free Software Foundation, |
|
| 9 |
// either version 2 of the License, or (at your option) any |
|
| 10 |
// later version. |
|
| 11 |
// |
|
| 12 |
// The TXM platform is distributed in the hope that it will be |
|
| 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
| 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
| 15 |
// PURPOSE. See the GNU General Public License for more |
|
| 16 |
// details. |
|
| 17 |
// |
|
| 18 |
// You should have received a copy of the GNU General |
|
| 19 |
// Public License along with the TXM platform. If not, see |
|
| 20 |
// http://www.gnu.org/licenses. |
|
| 21 |
// |
|
| 22 |
// |
|
| 23 |
// |
|
| 24 |
// $LastChangedDate:$ |
|
| 25 |
// $LastChangedRevision:$ |
|
| 26 |
// $LastChangedBy:$ |
|
| 27 |
// |
|
| 28 |
package org.txm.conllu.rcp.commands; |
|
| 29 |
|
|
| 30 |
import java.util.HashMap; |
|
| 31 |
|
|
| 32 |
import org.eclipse.core.commands.AbstractHandler; |
|
| 33 |
import org.eclipse.core.commands.ExecutionEvent; |
|
| 34 |
import org.eclipse.core.commands.ExecutionException; |
|
| 35 |
import org.eclipse.jface.dialogs.InputDialog; |
|
| 36 |
import org.eclipse.jface.dialogs.MessageDialog; |
|
| 37 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
| 38 |
import org.kohsuke.args4j.Option; |
|
| 39 |
import org.osgi.service.prefs.Preferences; |
|
| 40 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
| 41 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
| 42 |
import org.txm.rcp.views.corpora.CorporaView; |
|
| 43 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 44 |
import org.txm.utils.logger.Log; |
|
| 45 |
|
|
| 46 |
/** |
|
| 47 |
* Export the conllu properties and CQP words into a conllu corpus of several files (one per text) |
|
| 48 |
* |
|
| 49 |
* @author mdecorde. |
|
| 50 |
*/ |
|
| 51 |
public class CoNLLUCorpusPreferences extends AbstractHandler {
|
|
| 52 |
|
|
| 53 |
public static final String ID = CoNLLUCorpusPreferences.class.getName(); |
|
| 54 |
|
|
| 55 |
|
|
| 56 |
/* |
|
| 57 |
* (non-Javadoc) |
|
| 58 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
|
| 59 |
*/ |
|
| 60 |
@Override |
|
| 61 |
public Object execute(final ExecutionEvent event) throws ExecutionException {
|
|
| 62 |
|
|
| 63 |
Object s = CorporaView.getFirstSelectedObject(); |
|
| 64 |
|
|
| 65 |
if (!(s instanceof MainCorpus)) {
|
|
| 66 |
Log.warning("Selection is not a corpus. Aborting.");
|
|
| 67 |
return null; |
|
| 68 |
} |
|
| 69 |
|
|
| 70 |
MainCorpus c = (MainCorpus)s; |
|
| 71 |
|
|
| 72 |
Preferences customNode = c.getProject().getPreferencesScope().getNode(UDPreferences.getInstance().getPreferencesNodeQualifier()); |
|
| 73 |
String udPrefix = customNode.get(UDPreferences.UDPREFIX, UDPreferences.getInstance().getString(UDPreferences.UDPREFIX)); |
|
| 74 |
|
|
| 75 |
InputDialog dialog = new InputDialog(HandlerUtil.getActiveShell(event), "CoNLL-U Corpus preferences", "Prefix of the CQP properties encoding the UD fields", udPrefix, null); |
|
| 76 |
|
|
| 77 |
if (dialog.open() == InputDialog.OK) {
|
|
| 78 |
|
|
| 79 |
customNode.put(UDPreferences.UDPREFIX, dialog.getValue()); |
|
| 80 |
Log.info("Set "+c.getName()+" prefix to "+dialog.getValue());
|
|
| 81 |
return s; |
|
| 82 |
} |
|
| 83 |
return null; |
|
| 84 |
} |
|
| 85 |
} |
|
| 86 |
|
|
| 87 |
|
|
| 0 | 88 | |
| TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ExportCorpusAsCoNLLU.java (revision 3685) | ||
|---|---|---|
| 114 | 114 |
CQPCorpus corpus = (CQPCorpus) s; |
| 115 | 115 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
| 116 | 116 |
|
| 117 |
return exportCorpus(mainCorpus, outputDirectory, sentenceStructure, lemmaProperty, posProperty, encoding); |
|
| 118 |
|
|
| 119 |
} |
|
| 120 |
|
|
| 121 |
public static boolean exportCorpus(MainCorpus mainCorpus, File outputDirectory, String sentenceStructure, String lemmaProperty, String posProperty, String encoding) {
|
|
| 117 | 122 |
try {
|
| 118 |
new ToCoNLL2009().process(outputDirectory, mainCorpus, corpus.getStructuralUnit(sentenceStructure), corpus.getProperty("word"), corpus.getProperty(lemmaProperty), corpus.getProperty(posProperty), encoding);
|
|
| 123 |
return new ToCoNLL2009().process(outputDirectory, mainCorpus, mainCorpus.getStructuralUnit(sentenceStructure), mainCorpus.getProperty("word"), mainCorpus.getProperty(lemmaProperty), mainCorpus.getProperty(posProperty), encoding);
|
|
| 124 |
|
|
| 119 | 125 |
} catch (Exception e) {
|
| 120 | 126 |
Log.warning(e); |
| 121 | 127 |
Log.printStackTrace(e); |
| 122 | 128 |
} |
| 123 |
|
|
| 124 |
return null; |
|
| 129 |
return false; |
|
| 125 | 130 |
} |
| 126 |
|
|
| 131 |
|
|
| 127 | 132 |
/** |
| 128 | 133 |
* export the corpus in a directory of conllu files (one per text) |
| 129 | 134 |
* |
Formats disponibles : Unified diff