Révision 3685
TXM/trunk/bundles/org.txm.conllu.rcp/OSGI-INF/l10n/bundle.properties (revision 3685) | ||
---|---|---|
4 | 4 |
command.label = CoNLL-U + CSV |
5 | 5 |
command.label.0 = Corpus in CoNLL-U format (.conllu)... |
6 | 6 |
command.name = from directory of CoNLL-U files (.conllu) ... |
7 |
command.name.0 = in CoNLL-U files (.conllu)...
|
|
7 |
command.name.0 = in CoNLL-U format (.conllu)...
|
|
8 | 8 |
command.name.1 = from a CoNLL-U file (.conllu)... |
9 | 9 |
|
10 | 10 |
page.name = UD |
TXM/trunk/bundles/org.txm.conllu.rcp/OSGI-INF/l10n/bundle_fr.properties (revision 3685) | ||
---|---|---|
4 | 4 |
command.label = CoNLL-U + CSV |
5 | 5 |
command.label.0 = Corpus au format CoNLL-U (.conllu)... |
6 | 6 |
command.name = depuis un r\u00E9pertoire de fichiers CoNLL-U (.conllu)... |
7 |
command.name.0 = vers des fichiers CoNLL-U (.conllu)...
|
|
7 |
command.name.0 = au format CoNLL-U (.conllu)...
|
|
8 | 8 |
command.name.1 = depuis un fichier CONLL-U... |
9 | 9 |
|
10 | 10 |
page.name = UD |
TXM/trunk/bundles/org.txm.conllu.rcp/plugin.xml (revision 3685) | ||
---|---|---|
25 | 25 |
categoryId="TreeSearch4TXM.commands.category" |
26 | 26 |
defaultHandler="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
27 | 27 |
id="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
28 |
name="in CoNLL-U format (.conllu)...">
|
|
28 |
name="%command.name.0">
|
|
29 | 29 |
</command> |
30 |
<command |
|
31 |
categoryId="TreeSearch4TXM.commands.category" |
|
32 |
defaultHandler="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
33 |
id="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
34 |
name="CoNLLU Corpus preferences"> |
|
35 |
</command> |
|
30 | 36 |
</extension> |
31 | 37 |
<extension |
32 | 38 |
point="org.eclipse.ui.menus"> |
... | ... | |
56 | 62 |
</reference> |
57 | 63 |
</visibleWhen> |
58 | 64 |
</command> |
59 |
<command |
|
60 |
commandId="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
|
61 |
icon="icons/functions/UD.png" |
|
62 |
style="push"> |
|
63 |
<visibleWhen |
|
64 |
checkEnabled="false"> |
|
65 |
<reference |
|
66 |
definitionId="OneMainCorpusSelected"> |
|
67 |
</reference> |
|
68 |
</visibleWhen> |
|
69 |
</command> |
|
70 | 65 |
</menuContribution> |
71 | 66 |
<menuContribution |
72 | 67 |
locationURI="menu:menu.file.import.annotations"> |
... | ... | |
107 | 102 |
</visibleWhen> |
108 | 103 |
</command> |
109 | 104 |
</menuContribution> |
105 |
<menuContribution |
|
106 |
locationURI="menu:menu.file.export.properties"> |
|
107 |
<command |
|
108 |
commandId="org.txm.conllu.rcp.commands.ExportCorpusAsFullCoNLLU" |
|
109 |
icon="icons/functions/UD.png" |
|
110 |
style="push"> |
|
111 |
<visibleWhen |
|
112 |
checkEnabled="false"> |
|
113 |
<reference |
|
114 |
definitionId="OneMainCorpusSelected"> |
|
115 |
</reference> |
|
116 |
</visibleWhen> |
|
117 |
</command> |
|
118 |
</menuContribution> |
|
119 |
<menuContribution |
|
120 |
locationURI="menu:menu.edit?after=org.txm.rcp.separator1"> |
|
121 |
<command |
|
122 |
commandId="org.txm.conllu.rcp.commands.CoNLLUCorpusPreferences" |
|
123 |
style="push"> |
|
124 |
<visibleWhen |
|
125 |
checkEnabled="false"> |
|
126 |
<reference |
|
127 |
definitionId="OneMainCorpusSelected"> |
|
128 |
</reference> |
|
129 |
</visibleWhen> |
|
130 |
</command> |
|
131 |
</menuContribution> |
|
110 | 132 |
</extension> |
111 | 133 |
<extension |
112 | 134 |
point="org.eclipse.ui.preferencePages"> |
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ToCoNLL2009.java (revision 3685) | ||
---|---|---|
19 | 19 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
20 | 20 |
import org.txm.utils.ConsoleProgressBar; |
21 | 21 |
|
22 |
import jline.internal.Log; |
|
23 |
|
|
22 | 24 |
public class ToCoNLL2009 { |
23 | 25 |
|
24 | 26 |
boolean debug = false; |
... | ... | |
48 | 50 |
WordProperty idProperty = corpus.getProperty("id"); |
49 | 51 |
|
50 | 52 |
ConsoleProgressBar cpb = new ConsoleProgressBar(texts.length); |
51 |
|
|
53 |
Log.info("Writing CoNLLu files in "+outdir.getAbsolutePath()); |
|
54 |
int nTokens = 0; |
|
52 | 55 |
for (String text : texts) { |
53 | 56 |
|
54 | 57 |
cpb.tick(); |
... | ... | |
67 | 70 |
} |
68 | 71 |
|
69 | 72 |
String[] words = CorpusManager.getCorpusManager().getCqiClient().cpos2Str(word.getQualifiedName(), positions); |
70 |
|
|
73 |
nTokens += words.length; |
|
71 | 74 |
String[] idsList = CorpusManager.getCorpusManager().getCqiClient().cpos2Str(idProperty.getQualifiedName(), positions); |
72 | 75 |
|
73 | 76 |
String[] lemmes = null; |
... | ... | |
120 | 123 |
} |
121 | 124 |
|
122 | 125 |
cpb.done(); |
126 |
Log.info("Done: "+texts.length+" texts and "+nTokens+" words."); |
|
123 | 127 |
return true; |
124 | 128 |
} |
125 | 129 |
|
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/CoNLLUCorpusPreferences.java (revision 3685) | ||
---|---|---|
1 |
// Copyright © 2010-2020 ENS de Lyon., University of Franche-Comté |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.conllu.rcp.commands; |
|
29 |
|
|
30 |
import java.util.HashMap; |
|
31 |
|
|
32 |
import org.eclipse.core.commands.AbstractHandler; |
|
33 |
import org.eclipse.core.commands.ExecutionEvent; |
|
34 |
import org.eclipse.core.commands.ExecutionException; |
|
35 |
import org.eclipse.jface.dialogs.InputDialog; |
|
36 |
import org.eclipse.jface.dialogs.MessageDialog; |
|
37 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
38 |
import org.kohsuke.args4j.Option; |
|
39 |
import org.osgi.service.prefs.Preferences; |
|
40 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
41 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
42 |
import org.txm.rcp.views.corpora.CorporaView; |
|
43 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
44 |
import org.txm.utils.logger.Log; |
|
45 |
|
|
46 |
/** |
|
47 |
* Export the conllu properties and CQP words into a conllu corpus of several files (one per text) |
|
48 |
* |
|
49 |
* @author mdecorde. |
|
50 |
*/ |
|
51 |
public class CoNLLUCorpusPreferences extends AbstractHandler { |
|
52 |
|
|
53 |
public static final String ID = CoNLLUCorpusPreferences.class.getName(); |
|
54 |
|
|
55 |
|
|
56 |
/* |
|
57 |
* (non-Javadoc) |
|
58 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
|
59 |
*/ |
|
60 |
@Override |
|
61 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
|
62 |
|
|
63 |
Object s = CorporaView.getFirstSelectedObject(); |
|
64 |
|
|
65 |
if (!(s instanceof MainCorpus)) { |
|
66 |
Log.warning("Selection is not a corpus. Aborting."); |
|
67 |
return null; |
|
68 |
} |
|
69 |
|
|
70 |
MainCorpus c = (MainCorpus)s; |
|
71 |
|
|
72 |
Preferences customNode = c.getProject().getPreferencesScope().getNode(UDPreferences.getInstance().getPreferencesNodeQualifier()); |
|
73 |
String udPrefix = customNode.get(UDPreferences.UDPREFIX, UDPreferences.getInstance().getString(UDPreferences.UDPREFIX)); |
|
74 |
|
|
75 |
InputDialog dialog = new InputDialog(HandlerUtil.getActiveShell(event), "CoNLL-U Corpus preferences", "Prefix of the CQP properties encoding the UD fields", udPrefix, null); |
|
76 |
|
|
77 |
if (dialog.open() == InputDialog.OK) { |
|
78 |
|
|
79 |
customNode.put(UDPreferences.UDPREFIX, dialog.getValue()); |
|
80 |
Log.info("Set "+c.getName()+" prefix to "+dialog.getValue()); |
|
81 |
return s; |
|
82 |
} |
|
83 |
return null; |
|
84 |
} |
|
85 |
} |
|
86 |
|
|
87 |
|
|
0 | 88 |
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ExportCorpusAsCoNLLU.java (revision 3685) | ||
---|---|---|
114 | 114 |
CQPCorpus corpus = (CQPCorpus) s; |
115 | 115 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
116 | 116 |
|
117 |
return exportCorpus(mainCorpus, outputDirectory, sentenceStructure, lemmaProperty, posProperty, encoding); |
|
118 |
|
|
119 |
} |
|
120 |
|
|
121 |
public static boolean exportCorpus(MainCorpus mainCorpus, File outputDirectory, String sentenceStructure, String lemmaProperty, String posProperty, String encoding) { |
|
117 | 122 |
try { |
118 |
new ToCoNLL2009().process(outputDirectory, mainCorpus, corpus.getStructuralUnit(sentenceStructure), corpus.getProperty("word"), corpus.getProperty(lemmaProperty), corpus.getProperty(posProperty), encoding); |
|
123 |
return new ToCoNLL2009().process(outputDirectory, mainCorpus, mainCorpus.getStructuralUnit(sentenceStructure), mainCorpus.getProperty("word"), mainCorpus.getProperty(lemmaProperty), mainCorpus.getProperty(posProperty), encoding); |
|
124 |
|
|
119 | 125 |
} catch (Exception e) { |
120 | 126 |
Log.warning(e); |
121 | 127 |
Log.printStackTrace(e); |
122 | 128 |
} |
123 |
|
|
124 |
return null; |
|
129 |
return false; |
|
125 | 130 |
} |
126 |
|
|
131 |
|
|
127 | 132 |
/** |
128 | 133 |
* export the corpus in a directory of conllu files (one per text) |
129 | 134 |
* |
Formats disponibles : Unified diff