Révision 3752
TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/ImportCoNLLUAnnotations.java (revision 3752) | ||
---|---|---|
24 | 24 |
|
25 | 25 |
public static final String[] UD_PROPERTY_NAMES = { "id", "form", "lemma", "upos", "xpos", "feats", "head", "deprel", "deps", "misc" }; |
26 | 26 |
|
27 |
public static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, String propertiesPrefix, String textId, Boolean normalize_word_ids, Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, XMLStreamException { |
|
27 |
public static int _importAnnotations(File coonluFile, MainCorpus mainCorpus, |
|
28 |
String propertiesPrefix, String textId, |
|
29 |
Boolean normalize_word_ids, |
|
30 |
Boolean importCommentProperties, |
|
31 |
Set<String> headPropertiesToProject, |
|
32 |
Set<String> depsPropertiesToProject, |
|
33 |
Set<String> udPropertiesToImport) throws IOException, XMLStreamException { |
|
28 | 34 |
|
29 | 35 |
if (textId == null || textId.length() == 0) { // no text id provided, using the conllu file name |
30 | 36 |
textId = coonluFile.getName().substring(0, coonluFile.getName().length() - 7); |
... | ... | |
118 | 124 |
} |
119 | 125 |
|
120 | 126 |
if (sent_id != null) { |
121 |
properties.put("#"+propertiesPrefix+"sentid", sent_id); |
|
127 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"sentid", sent_id);
|
|
122 | 128 |
sent_id = ""; // reset value for next sentence |
123 | 129 |
} |
124 | 130 |
else { |
125 |
properties.put("#"+propertiesPrefix+"sentid", ""); |
|
131 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"sentid", "");
|
|
126 | 132 |
} |
127 | 133 |
|
128 | 134 |
if (newdoc_id != null) { |
129 |
properties.put("#"+propertiesPrefix+"newdocid", newdoc_id); |
|
135 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"newdocid", newdoc_id);
|
|
130 | 136 |
newdoc_id = null; // reset value for next sentence |
131 | 137 |
} |
132 | 138 |
else { |
133 |
properties.put("#"+propertiesPrefix+"newdocid", ""); |
|
139 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"newdocid", "");
|
|
134 | 140 |
} |
135 | 141 |
|
136 | 142 |
if (newpar_id != null) { |
137 |
properties.put("#"+propertiesPrefix+"newparid", newpar_id); |
|
143 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"newparid", newpar_id);
|
|
138 | 144 |
newpar_id = null; // reset value for next sentence |
139 | 145 |
} |
140 | 146 |
else { |
141 |
properties.put("#"+propertiesPrefix+"newparid", ""); |
|
147 |
if (importCommentProperties) properties.put("#"+propertiesPrefix+"newparid", "");
|
|
142 | 148 |
} |
143 | 149 |
|
144 | 150 |
if (wId == null) { |
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromFile.java (revision 3752) | ||
---|---|---|
73 | 73 |
@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
74 | 74 |
Boolean normalize_word_ids; |
75 | 75 |
|
76 |
@Option(name = "import_conll_comment_properties", usage = "if set sentid, newdocid and parid will be imported in CQP properties", widget = "Boolean", required = true, def = "false") |
|
77 |
Boolean import_conll_comment_properties; |
|
78 |
|
|
76 | 79 |
@Option(name = "headPropertiesToProject", usage = "to create the headXYZ properties", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
77 | 80 |
String headPropertiesToProject; |
78 | 81 |
|
... | ... | |
117 | 120 |
return null; |
118 | 121 |
} |
119 | 122 |
|
120 |
return ImportCoNLLUAnnotationsFromDirectory.importAnnotationsFromCoNLLUFile(mainCorpus, conlluFile, propertiesPrefix, textId, normalize_word_ids, |
|
123 |
return ImportCoNLLUAnnotationsFromDirectory.importAnnotationsFromCoNLLUFile(mainCorpus, conlluFile, propertiesPrefix, textId, normalize_word_ids, import_conll_comment_properties,
|
|
121 | 124 |
new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), |
122 | 125 |
new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))), |
123 | 126 |
new HashSet<String>(Arrays.asList(udPropertiesToImport.split(",")))); |
TXM/trunk/bundles/org.txm.conllu.rcp/src/org/txm/conllu/rcp/commands/ImportCoNLLUAnnotationsFromDirectory.java (revision 3752) | ||
---|---|---|
80 | 80 |
|
81 | 81 |
@Option(name = "normalize_word_ids", usage = "if set the CQP properties are replaced", widget = "Boolean", required = true, def = "false") |
82 | 82 |
Boolean normalize_word_ids; |
83 |
|
|
84 |
@Option(name = "import_conll_comment_properties", usage = "if set sentid, newdocid and parid will be imported in CQP properties", widget = "Boolean", required = true, def = "false") |
|
85 |
Boolean import_conll_comment_properties; |
|
83 | 86 |
|
84 | 87 |
@Option(name = "headPropertiesToProject", usage = "to create the head-XYZ properties from the word head", widget = "StringArrayMultiple", metaVar="form lemma upos xpos feats head deprel deps misc", required = true, def = "deprel,upos") |
85 | 88 |
String headPropertiesToProject; |
... | ... | |
114 | 117 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
115 | 118 |
|
116 | 119 |
try { |
117 |
HashSet<String> test = new HashSet<>(); |
|
118 |
for (String p : ImportCoNLLUAnnotations.UD_PROPERTY_NAMES) {
|
|
120 |
HashSet<String> test = new HashSet<>(); // will contains the CQP properties to update
|
|
121 |
for (String p : udPropertiesToImport.split(",")) { // test the properties to import
|
|
119 | 122 |
if (mainCorpus.getProperty(propertiesPrefix + p) != null) { |
120 | 123 |
test.add(propertiesPrefix + p); |
121 | 124 |
} |
... | ... | |
125 | 128 |
return null; |
126 | 129 |
} |
127 | 130 |
|
128 |
return importAnnotations(mainCorpus, conlluDirectory, propertiesPrefix, normalize_word_ids, |
|
131 |
return importAnnotations(mainCorpus, conlluDirectory, propertiesPrefix, normalize_word_ids, import_conll_comment_properties,
|
|
129 | 132 |
new HashSet<String>(Arrays.asList(headPropertiesToProject.split(","))), |
130 | 133 |
new HashSet<String>(Arrays.asList(depsPropertiesToProject.split(","))), |
131 | 134 |
new HashSet<String>(Arrays.asList(udPropertiesToImport.split(",")))); |
... | ... | |
151 | 154 |
* @throws IOException |
152 | 155 |
* @throws XMLStreamException |
153 | 156 |
*/ |
154 |
public static int importAnnotations(MainCorpus mainCorpus, File conlluDirectory, String propertiesPrefix, Boolean normalizeWordIds, |
|
157 |
public static int importAnnotations(MainCorpus mainCorpus, File conlluDirectory, String propertiesPrefix, Boolean normalizeWordIds, Boolean importCommentProperties,
|
|
155 | 158 |
Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, CqiServerError, CqiClientException, XMLStreamException { |
156 | 159 |
|
157 | 160 |
Log.info(TXMCoreMessages.bind("Importing CONLL-U annotations of {0} in {1} using the ''{2}'' prefix...", conlluDirectory, mainCorpus, propertiesPrefix)); |
... | ... | |
174 | 177 |
|
175 | 178 |
for (File coonluFile : files) { |
176 | 179 |
cpb.tick(); |
177 |
nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport); |
|
180 |
nWordsInserted += ImportCoNLLUAnnotations._importAnnotations(coonluFile, mainCorpus, propertiesPrefix, null, normalizeWordIds, importCommentProperties, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
|
|
178 | 181 |
nTextProcessed++; |
179 | 182 |
} |
180 | 183 |
cpb.done(); |
... | ... | |
216 | 219 |
* @throws IOException |
217 | 220 |
* @throws XMLStreamException |
218 | 221 |
*/ |
219 |
public static int importAnnotationsFromCoNLLUFile(MainCorpus mainCorpus, File conlluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids, |
|
222 |
public static int importAnnotationsFromCoNLLUFile(MainCorpus mainCorpus, File conlluFile, String propertiesPrefix, String textId, Boolean normalize_word_ids, Boolean importCommentProperties,
|
|
220 | 223 |
Set<String> headPropertiesToProject, Set<String> depsPropertiesToProject, Set<String> udPropertiesToImport) throws IOException, |
221 | 224 |
CqiServerError, CqiClientException, XMLStreamException { |
222 | 225 |
Log.info(TXMCoreMessages.bind("Importing CONLL-u annotations of {0} in {1} using the ''{2}'' prefix...", conlluFile, mainCorpus, propertiesPrefix)); |
223 | 226 |
|
224 |
int nWordsInserted = ImportCoNLLUAnnotations._importAnnotations(conlluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport); |
|
227 |
int nWordsInserted = ImportCoNLLUAnnotations._importAnnotations(conlluFile, mainCorpus, propertiesPrefix, textId, normalize_word_ids, importCommentProperties, headPropertiesToProject, depsPropertiesToProject, udPropertiesToImport);
|
|
225 | 228 |
|
226 | 229 |
if (nWordsInserted == 0) { |
227 | 230 |
Log.warning("** No annotation imported. Aborting."); |
Formats disponibles : Unified diff