Révision 3561
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromFile.java (revision 3561) | ||
---|---|---|
64 | 64 |
@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-") |
65 | 65 |
String propertiesPrefix; |
66 | 66 |
|
67 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc") |
|
67 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="id form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc")
|
|
68 | 68 |
String udPropertiesToImport; |
69 | 69 |
|
70 | 70 |
@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromDirectory.java (revision 3561) | ||
---|---|---|
72 | 72 |
@Option(name = "propertiesPrefix", usage = "optional prefix for the properties to create", widget = "String", required = true, def = "ud-") |
73 | 73 |
String propertiesPrefix; |
74 | 74 |
|
75 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc") |
|
75 |
@Option(name = "udPropertiesToImport", usage = "to create the ud properties", widget = "StringArrayMultiple", metaVar="id form lemma upos xpos feats head deprel deps misc", required = true, def = "form,lemma,upos,xpos,feats,head,deprel,deps,misc")
|
|
76 | 76 |
String udPropertiesToImport; |
77 | 77 |
|
78 | 78 |
@Option(name = "overwrite_cqp_properties", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
... | ... | |
166 | 166 |
int nTextProcessed = 0; |
167 | 167 |
int nWordsInserted = 0; |
168 | 168 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
169 |
|
|
170 |
if (files.length == 0) { |
|
171 |
Log.warning(NLS.bind("** No *.conllu files found in {0}. Aborting.", conlluDirectory)); |
|
172 |
return 0; |
|
173 |
} |
|
174 |
|
|
169 | 175 |
for (File coonluFile : files) { |
170 | 176 |
cpb.tick(); |
171 | 177 |
nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport); |
TXM/trunk/org.txm.rcp/rcpapplication.product (revision 3561) | ||
---|---|---|
80 | 80 |
</launcher> |
81 | 81 |
|
82 | 82 |
<vm> |
83 |
<linux include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7</linux>
|
|
84 |
<macos include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7</macos>
|
|
83 |
<linux include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8</linux>
|
|
84 |
<macos include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8</macos>
|
|
85 | 85 |
<solaris include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7</solaris> |
86 | 86 |
<windows include="false">org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8</windows> |
87 | 87 |
</vm> |
TXM/trunk/org.txm.connlu.core/src/org/txm/conllu/core/function/ImportCoNLLUAnnotations.java (revision 3561) | ||
---|---|---|
30 | 30 |
textId = coonluFile.getName().substring(0, coonluFile.getName().length() - 7); |
31 | 31 |
} |
32 | 32 |
|
33 |
for (String p : headPropertiesToProject) { |
|
34 |
udPropertiesToImport.add("head-"+p); |
|
35 |
} |
|
36 |
|
|
37 |
for (String p : depsPropertiesToProject) { |
|
38 |
udPropertiesToImport.add("dep-"+p); |
|
39 |
} |
|
40 |
|
|
33 | 41 |
Log.info("** processing text: " + textId); |
34 | 42 |
Text text = mainCorpus.getProject().getText(textId); |
35 | 43 |
if (text == null) { |
... | ... | |
181 | 189 |
|
182 | 190 |
return processor.getNInsertions(); |
183 | 191 |
} |
184 |
|
|
192 |
|
|
185 | 193 |
public static void buildPropertiesProjections(LinkedHashMap<String, HashMap<String, String>> sentenceProperties, Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject) throws IOException, XMLStreamException { |
186 | 194 |
|
187 | 195 |
//System.out.println("Building "+headPropertiesToProject+" and "+depsPropertiesToProject+" in "+sentenceProperties); |
... | ... | |
194 | 202 |
for (String wheadid : sentenceProperties.keySet()) { // find the word head properties |
195 | 203 |
|
196 | 204 |
HashMap<String, String> headProperties = sentenceProperties.get(wheadid); |
197 |
|
|
205 |
|
|
198 | 206 |
if (headProperties.get("id").equals(properties.get("head"))) { // a word which head is the current word id |
199 | 207 |
// { "form", "lemma", "upos", "xpos", "feats", "deprel", "deps", "misc" }; |
200 | 208 |
if (headPropertiesToProject.contains("form")) properties.put("head-form", headProperties.get("form")); |
... | ... | |
259 | 267 |
HashMap<String, String> properties = sentenceProperties.get(id); |
260 | 268 |
ArrayList<String> propertyNames = new ArrayList<String>(properties.keySet()); |
261 | 269 |
|
262 |
for (String p : propertyNames) { |
|
270 |
for (String p : propertyNames) { // add # to properties to inject, and remove properties not to inject
|
|
263 | 271 |
|
264 |
if (udPropertiesToImport != null && udPropertiesToImport.size() > 0 && !udPropertiesToImport.contains(p)) continue; // keep only the sentence properties to import |
|
265 |
|
|
266 |
if (p.startsWith("#")) continue; //property already prefixed with #, do not add the prefix |
|
267 |
|
|
272 |
if (p.startsWith("#")) { |
|
273 |
continue; //property already prefixed with #, do not add the prefix |
|
274 |
} |
|
275 |
else if (udPropertiesToImport != null && udPropertiesToImport.size() > 0 && !udPropertiesToImport.contains(p)) {; // keep only the sentence properties to import |
|
276 |
properties.remove(p); |
|
277 |
continue; //property already prefixed with #, do not add the prefix |
|
278 |
} |
|
268 | 279 |
String value = properties.get(p); |
269 | 280 |
properties.remove(p); |
270 | 281 |
properties.put("#"+propertiesPrefix+p, value); |
271 | 282 |
} |
283 |
|
|
272 | 284 |
processor.addProperty(id, properties); |
273 | 285 |
} |
274 | 286 |
|
Formats disponibles : Unified diff