Révision 4019
TXM/trunk/bundles/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 4019) | ||
---|---|---|
32 | 32 |
|
33 | 33 |
public final String merge(String orig, def sss) { |
34 | 34 |
|
35 |
int n = 0; |
|
36 |
for (String s : sss) { |
|
37 |
if (s != "" && s != "_") { |
|
35 |
if (orig.equals("") || orig.equals("_")) { |
|
38 | 36 |
|
39 |
def ssset = new HashSet(sss); |
|
40 |
if (ssset.size() == 1) return ssset.join(".") |
|
41 |
|
|
42 |
return sss.join(".") |
|
43 |
} |
|
37 |
} else { |
|
38 |
sss.add(0, orig) |
|
44 | 39 |
} |
45 | 40 |
|
46 |
return orig; |
|
41 |
def ssset = new LinkedHashSet(sss) |
|
42 |
|
|
43 |
return ssset.join(".") |
|
47 | 44 |
} |
48 | 45 |
|
49 | 46 |
@Override |
... | ... | |
66 | 63 |
def files = conlluSrcDirectory.listFiles() |
67 | 64 |
files.sort() |
68 | 65 |
|
69 |
// Keep or not contractions |
|
70 |
String contractionsManagement = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT)); |
|
71 |
|
|
72 |
println "Contractions managment ($contractionsManagement) & add XmlId if necessary & remove empty nodes" |
|
66 |
println "Add XmlId if necessary & remove empty nodes" |
|
73 | 67 |
ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size()) |
74 | 68 |
for (File conlluFile : files) { |
75 | 69 |
cpb_texts.tick() |
... | ... | |
80 | 74 |
ArrayList<String> lines = IOUtils.getLines(conlluFile, "UTF-8"); |
81 | 75 |
for (int i = 0 ; i < lines.size() ; i++) { |
82 | 76 |
String line = lines[i] |
83 |
|
|
77 |
|
|
84 | 78 |
if (line.length() == 0 || line.startsWith("#") || !line.contains("\t")) continue; |
85 |
|
|
79 |
|
|
86 | 80 |
def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length); |
87 | 81 |
if (split[0].contains(".")) { |
88 | 82 |
//println "REMOVE EMPTY NODE: $split : "+ |
... | ... | |
107 | 101 |
} |
108 | 102 |
} |
109 | 103 |
|
110 |
if (contractionsManagement == UDPreferences.ALL) { |
|
111 |
// ok on fait rien |
|
112 |
} else if (contractionsManagement == UDPreferences.SYNTAX) { |
|
113 |
if (split[0].contains("-")) { |
|
104 |
lines[i] = split.join("\t") // rebuild the line |
|
105 |
} |
|
106 |
IOUtils.write(conlluFile, lines.join("\n") + "\n") // CoNLLU needs the last line |
|
107 |
} |
|
108 |
} |
|
109 |
cpb_texts.done() |
|
114 | 110 |
|
115 |
// stores the syntatic word id and the ortographic word properties |
|
116 |
temp_multiwords = [:] |
|
117 |
int n1 = Integer.parseInt(split[0].substring(0, split[0].indexOf("-"))); |
|
118 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-"))); |
|
119 |
for (int ii = n1 ; ii <= n2 ; ii++) { |
|
120 |
temp_multiwords[""+ii] = split; |
|
121 |
} |
|
122 | 111 |
|
123 |
//println "REMOVE - $split" |
|
124 |
lines.remove(i) |
|
125 |
i-- |
|
126 |
continue; /// next ! |
|
127 |
} else if (temp_multiwords.containsKey(split[0])) { // it's a syntactic word of an orthographic word |
|
128 |
def split_ortho = temp_multiwords.remove(split[0]) |
|
112 |
// Keep or not contractions |
|
113 |
File conlluSrcForTXMDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu-fortxm") |
|
114 |
|
|
115 |
String contractionsManagement = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT)); |
|
129 | 116 |
|
130 |
if (split[9].length() > 0) split[9] += "|" |
|
131 |
split[9] += "multiword="+split_ortho[1] // the orthographic form |
|
132 |
} |
|
133 |
} else if (contractionsManagement == UDPreferences.SURFACE) { |
|
134 |
if (split[0].contains("-")) { |
|
135 |
int n1 = Integer.parseInt(split[0].substring(0, split[0].indexOf("-"))); |
|
136 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-"))); |
|
137 |
int n = n2 - n1 |
|
117 |
if (contractionsManagement == UDPreferences.ALL) { |
|
118 |
conlluSrcForTXMDirectory = conlluSrcDirectory; // use the same directory as TIGER since no word modifications have been done |
|
119 |
} else { |
|
120 |
|
|
121 |
conlluSrcForTXMDirectory.deleteDir() |
|
122 |
conlluSrcForTXMDirectory.mkdirs() |
|
123 |
|
|
124 |
println "Contractions managment mode is '$contractionsManagement'" |
|
125 |
cpb_texts = new ConsoleProgressBar(files.size()) |
|
126 |
for (File conlluFile : files) { |
|
127 |
cpb_texts.tick() |
|
138 | 128 |
|
139 |
//split[0] = ""+n1
|
|
129 |
if (conlluFile.getName().endsWith(".conllu")) {
|
|
140 | 130 |
|
141 |
// before merging and deleting words, check if they are the right ones |
|
142 |
if (lines[i+1].startsWith(""+n1+"\t") && lines[i+n+1].startsWith(""+n2+"\t")) { |
|
143 |
def splits = [] |
|
144 |
for (int j = 0 ; j <= n ;j++) { |
|
145 |
def tmp = lines[i+j+1].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length); |
|
146 |
splits << tmp |
|
147 |
} |
|
131 |
File conlluFile2 = new File(conlluSrcForTXMDirectory, conlluFile.getName()) |
|
148 | 132 |
|
149 |
for (int j = 2 ; j < 8 ; j++) { |
|
150 |
split[j] = merge(split[j], splits.collect(){it[j]}) |
|
133 |
String textid = FileUtils.stripExtension(conlluFile) |
|
134 |
int wcounter = 1; |
|
135 |
|
|
136 |
ArrayList<String> lines = IOUtils.getLines(conlluFile, "UTF-8"); |
|
137 |
|
|
138 |
def temp_multiwords = [:] |
|
139 |
|
|
140 |
for (int i = 0 ; i < lines.size() ; i++) { |
|
141 |
String line = lines[i] |
|
142 |
if (line.length() == 0 || line.startsWith("#") || !line.contains("\t")) continue; |
|
143 |
|
|
144 |
def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length); |
|
145 |
|
|
146 |
if (contractionsManagement == UDPreferences.SYNTAX) { |
|
147 |
if (split[0].contains("-")) { |
|
148 |
|
|
149 |
// stores the syntatic word id and the ortographic word properties |
|
150 |
temp_multiwords = [:] |
|
151 |
int n1 = Integer.parseInt(split[0].substring(0, split[0].indexOf("-"))); |
|
152 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-"))); |
|
153 |
for (int ii = n1 ; ii <= n2 ; ii++) { |
|
154 |
temp_multiwords[""+ii] = split; |
|
151 | 155 |
} |
152 | 156 |
|
153 |
//println "REMOVE non- $split" |
|
154 |
for (int j = 0 ; j <= n ;j++) { |
|
155 |
lines.remove(i+1) |
|
157 |
//println "REMOVE - $split" |
|
158 |
lines.remove(i) |
|
159 |
i-- |
|
160 |
continue; /// next ! |
|
161 |
} else if (temp_multiwords.containsKey(split[0])) { // it's a syntactic word of an orthographic word |
|
162 |
def split_ortho = temp_multiwords.remove(split[0]) |
|
163 |
|
|
164 |
if (split[9].length() > 0) split[9] += "|" |
|
165 |
split[9] += "multiword="+split_ortho[1] // the orthographic form |
|
166 |
} |
|
167 |
} else if (contractionsManagement == UDPreferences.SURFACE) { |
|
168 |
if (split[0].contains("-")) { |
|
169 |
int n1 = Integer.parseInt(split[0].substring(0, split[0].indexOf("-"))); |
|
170 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-"))); |
|
171 |
int n = n2 - n1 |
|
172 |
|
|
173 |
//split[0] = ""+n1 |
|
174 |
|
|
175 |
// before merging and deleting words, check if they are the right ones |
|
176 |
if (lines[i+1].startsWith(""+n1+"\t") && lines[i+n+1].startsWith(""+n2+"\t")) { |
|
177 |
def splits = [] |
|
178 |
for (int j = 0 ; j <= n ;j++) { |
|
179 |
def tmp = lines[i+j+1].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length); |
|
180 |
splits << tmp |
|
181 |
} |
|
182 |
|
|
183 |
for (int j = 1 ; j < 8 ; j++) { |
|
184 |
split[j] = merge(split[j], splits.collect(){it[j]}) |
|
185 |
} |
|
186 |
|
|
187 |
//println "REMOVE non- $split" |
|
188 |
for (int j = 0 ; j <= n ;j++) { |
|
189 |
lines.remove(i+1) |
|
190 |
} |
|
156 | 191 |
} |
192 |
//println "splits=$splits" |
|
157 | 193 |
} |
158 |
//println "splits=$splits" |
|
159 | 194 |
} |
195 |
|
|
196 |
lines[i] = split.join("\t") // rebuild the line |
|
160 | 197 |
} |
161 |
|
|
162 |
lines[i] = split.join("\t") // rebuild the line |
|
198 |
IOUtils.write(conlluFile2, lines.join("\n") + "\n") // CoNLLU needs the last line |
|
163 | 199 |
} |
164 |
IOUtils.write(conlluFile, lines.join("\n") + "\n") // CoNLLU needs the last line |
|
165 | 200 |
} |
201 |
cpb_texts.done() |
|
166 | 202 |
} |
167 |
cpb_texts.done() |
|
168 |
|
|
169 |
|
|
170 |
// // Fix missing XmlId in conllu files |
|
171 |
// println "Setting word XmlID if necessary" |
|
172 |
// ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size()) |
|
173 |
// for (File conlluFile : files) { |
|
174 |
// cpb_texts.tick() |
|
175 |
// if (conlluFile.getName().endsWith(".conllu")) { |
|
176 |
// String textid = FileUtils.stripExtension(conlluFile) |
|
177 |
// int wcounter = 1; |
|
178 |
// ArrayList<String> lines = IOUtils.getLines(conlluFile, "UTF-8"); |
|
179 |
// for (int i = 0 ; i < lines.size() ; i++) { |
|
180 |
// String line = lines[i] |
|
181 |
// if (line.length() == 0 || line.startsWith("#") || !line.contains("\t")) continue; |
|
182 |
// |
|
183 |
// def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length); |
|
184 |
// if (split[-1] != null && !split[-1].contains("XmlId=")) { |
|
185 |
// split[-1] += "|XmlId=w_"+textid+"_"+(wcounter++) |
|
186 |
// lines[i] = split.join("\t") |
|
187 |
// } |
|
188 |
// |
|
189 |
// } |
|
190 |
// IOUtils.write(conlluFile, lines.join("\n") + "\n") |
|
191 |
// } |
|
192 |
// } |
|
193 |
// cpb_texts.done() |
|
194 |
|
|
195 | 203 |
File metadataFile = Metadatas.findMetadataFile(module.sourceDirectory) |
196 | 204 |
File srcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu2tei") |
197 | 205 |
srcDirectory.deleteDir() |
... | ... | |
203 | 211 |
} |
204 | 212 |
|
205 | 213 |
println "Convert CoNLL-U to XML-TEI..." |
206 |
convertCoNLLU2TEI(conlluSrcDirectory, srcDirectory, project) |
|
214 |
convertCoNLLU2TEI(conlluSrcForTXMDirectory, srcDirectory, project)
|
|
207 | 215 |
|
208 | 216 |
inputDirectory = srcDirectory // switch files source directory |
209 | 217 |
|
... | ... | |
322 | 330 |
} |
323 | 331 |
|
324 | 332 |
} else { |
325 |
|
|
326 | 333 |
LinkedHashMap<String, String> wProperties = new LinkedHashMap<String, String>() |
327 | 334 |
|
328 | 335 |
def split = line.split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length) |
... | ... | |
430 | 437 |
|
431 | 438 |
for (def word : words) { |
432 | 439 |
|
433 |
//println "UD-ID="+word["id"] |
|
434 |
if (word["id"].contains("-")) { |
|
435 |
writer.writeStartElement("seg") |
|
436 |
writer.writeCharacters("******") |
|
437 |
writer.writeEndElement() // span |
|
438 |
} |
|
439 |
|
|
440 | 440 |
String id = null |
441 | 441 |
wordCounter++ |
442 | 442 |
writer.writeStartElement ("w") |
... | ... | |
458 | 458 |
writer.writeAttribute("id", "w_"+text_id+"_"+wordCounter) |
459 | 459 |
} |
460 | 460 |
|
461 |
writer.writeCharacters(word["form"]) |
|
461 |
int idx = word["form"].indexOf(".", 1); |
|
462 |
if (word["id"].contains("-") && idx > 0) { |
|
463 |
writer.writeCharacters(word["form"].substring(0, idx)) |
|
464 |
} else { |
|
465 |
writer.writeCharacters(word["form"]) |
|
466 |
} |
|
462 | 467 |
writer.writeEndElement() // w |
463 | 468 |
writer.writeCharacters(" ") |
464 | 469 |
} |
TXM/trunk/bundles/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl (revision 4019) | ||
---|---|---|
242 | 242 |
|
243 | 243 |
my $commentlines = 0; #added by AL |
244 | 244 |
|
245 |
# my $contractions = 0; #added by AL
|
|
245 |
my $contractions = 0; #added by AL |
|
246 | 246 |
# my $text_id = "unknown_text"; |
247 | 247 |
my $text_id = $infilename; |
248 | 248 |
my $sent_id = "0"; |
... | ... | |
268 | 268 |
$commentlines++; |
269 | 269 |
next; |
270 | 270 |
} |
271 |
# # Added by AL for contractions
|
|
272 |
# elsif ( $words[$w] =~ /^\d+-\d+/ ) {
|
|
273 |
# |
|
274 |
# # print LOG "Contraction line loop 1: $words[$w]\n";
|
|
275 |
# $commentlines++;
|
|
276 |
# |
|
277 |
# # $contractions++;
|
|
278 |
# next;
|
|
279 |
# }
|
|
271 |
# Added by AL for contractions |
|
272 |
elsif ( $words[$w] =~ /^\d+-\d+/ ) { |
|
273 |
|
|
274 |
# print LOG "Contraction line loop 1: $words[$w]\n"; |
|
275 |
$commentlines++; |
|
276 |
|
|
277 |
# $contractions++; |
|
278 |
next; |
|
279 |
} |
|
280 | 280 |
else { |
281 | 281 |
if ( defined($opt_c) ) { |
282 | 282 |
$words[$w] =~ s/coord(\d+)-//g; |
... | ... | |
437 | 437 |
next; |
438 | 438 |
} |
439 | 439 |
|
440 |
# #Added AL for contractions |
|
441 |
# if ( $words[$i] =~ /^\d+-\d+/ ) { |
|
442 |
# |
|
443 |
# # print LOG "Contraction loop 2 : $words[$i]\n"; |
|
444 |
# next; |
|
445 |
# } |
|
440 |
#Added AL for contractions |
|
441 |
if ( $words[$i] =~ /^\d+-\d+/ ) { |
|
446 | 442 |
|
443 |
# print LOG "Contraction loop 2 : $words[$i]\n"; |
|
444 |
next; |
|
445 |
} |
|
446 |
|
|
447 | 447 |
else { |
448 | 448 |
|
449 | 449 |
@cols = split( /\t/, $words[$i] ); |
... | ... | |
1103 | 1103 |
$print_nt_features = $nt_features; |
1104 | 1104 |
} |
1105 | 1105 |
} |
1106 |
|
|
1107 |
my $cat = $_[0]; |
|
1108 |
if ( $cat eq '' ) { |
|
1109 |
$cat = '__UNDEF__'; |
|
1110 |
} |
|
1106 | 1111 |
printf XML |
1107 | 1112 |
" <nt id=\"n%d_%d%s\" cat=\"%s\" coord=\"--\" dom=\"%s\" type=\"%s\" vform=\"%s\" vlemma=\"%s\"%s note=\"%s\" snr=\"%d\">\n", |
1108 |
$., $w, $dupl, $_[0], $dom, $type, $vform, $vlemma, $print_nt_features,
|
|
1113 |
$., $w, $dupl, $cat, $dom, $type, $vform, $vlemma, $print_nt_features,
|
|
1109 | 1114 |
notes("$._$w"), $.; |
1110 | 1115 |
printf XML " <edge idref=\"s%d_%d%s\" label=\"L\"/>\n", $., $w, $dupl; |
1111 | 1116 |
|
TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/FixDriverFile.java (revision 4019) | ||
---|---|---|
3 | 3 |
import java.io.File; |
4 | 4 |
import java.io.IOException; |
5 | 5 |
import java.util.Arrays; |
6 |
import java.util.HashMap; |
|
7 | 6 |
import java.util.HashSet; |
7 |
import java.util.LinkedHashMap; |
|
8 |
import java.util.LinkedHashSet; |
|
8 | 9 |
import java.util.List; |
9 | 10 |
|
10 | 11 |
import javax.xml.parsers.ParserConfigurationException; |
... | ... | |
24 | 25 |
|
25 | 26 |
public static boolean fixFeatureValues(File driverFile, List<File> xmlFiles) throws ParserConfigurationException, SAXException, IOException { |
26 | 27 |
|
27 |
HashMap<String, HashSet<String>> declaredFeatures = new HashMap<String, HashSet<String>>();
|
|
28 |
HashMap<String, HashSet<String>> missingFeatures = new HashMap<String, HashSet<String>>();
|
|
29 |
HashMap<String, Element> featuresElements = new HashMap<String, Element>();
|
|
28 |
LinkedHashMap<String, LinkedHashSet<String>> declaredFeatures = new LinkedHashMap<String, LinkedHashSet<String>>();
|
|
29 |
LinkedHashMap<String, LinkedHashSet<String>> missingFeatures = new LinkedHashMap<String, LinkedHashSet<String>>();
|
|
30 |
LinkedHashMap<String, Element> featuresElements = new LinkedHashMap<String, Element>();
|
|
30 | 31 |
|
31 | 32 |
Document doc = DomUtils.load(driverFile); |
32 | 33 |
NodeList featuresList = doc.getElementsByTagName("feature"); |
... | ... | |
34 | 35 |
Element f = (Element) featuresList.item(i); |
35 | 36 |
|
36 | 37 |
featuresElements.put(f.getAttribute("name")+"\t"+f.getAttribute("domain"), f); |
37 |
HashSet<String> values = new HashSet<String>();
|
|
38 |
LinkedHashSet<String> values = new LinkedHashSet<String>();
|
|
38 | 39 |
declaredFeatures.put(f.getAttribute("name")+"\t"+f.getAttribute("domain"), values); |
39 |
missingFeatures.put(f.getAttribute("name")+"\t"+f.getAttribute("domain"), new HashSet<String>()); |
|
40 |
missingFeatures.put(f.getAttribute("name")+"\t"+f.getAttribute("domain"), new LinkedHashSet<String>());
|
|
40 | 41 |
|
41 | 42 |
NodeList featureValuesList = f.getElementsByTagName("value"); |
42 | 43 |
for (int j = 0 ; j < featureValuesList.getLength() ; j++) { |
... | ... | |
95 | 96 |
//System.out.println("\t"+missingFeatureNamedomain); |
96 | 97 |
|
97 | 98 |
Element f = featuresElements.get(missingFeatureNamedomain); |
99 |
//missingFeatures.get(missingFeatureNamedomain).add(""); |
|
98 | 100 |
|
99 | 101 |
for (String v : missingFeatures.get(missingFeatureNamedomain)) { |
100 | 102 |
//System.out.println("\t\t"+v); |
TXM/trunk/bundles/org.txm.conllu.core/src/org/txm/conllu/core/function/BratPrintTree.java (revision 4019) | ||
---|---|---|
30 | 30 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-"))); |
31 | 31 |
int n = n2 - n1; |
32 | 32 |
|
33 |
ArrayList<String[]> newlines = new ArrayList<>(); |
|
34 |
for (int j = 0 ; j <= n ; j++) { |
|
35 |
newlines.add(new String[split.length]); |
|
36 |
for (int p = 0 ; p < split.length ; p++) { |
|
37 |
newlines.get(j)[p] = "_"; |
|
38 |
} |
|
39 |
} |
|
33 |
//System.out.println("Word "+Arrays.toString(split)); |
|
34 |
//System.out.println("lines to insert: "+n); |
|
40 | 35 |
if ( !(splittedLines.get(i+1)[0].equals(""+n1)) || !(splittedLines.get(i+n+1)[0].equals(""+n2)) ) { |
41 |
System.out.println("FIXING "+conll.get(i)); |
|
42 |
for (int p = 2 ; p < split.length - 1 ; p++) { |
|
43 |
String[] splittedValues = split[p].split("."); |
|
36 |
|
|
37 |
ArrayList<String[]> newlines = new ArrayList<>(); |
|
38 |
for (int j = 0 ; j <= n ; j++) { |
|
39 |
newlines.add(new String[split.length]); |
|
44 | 40 |
|
41 |
newlines.get(j)[0] = ""+(n1+j); |
|
45 | 42 |
|
46 |
for (int j = 0 ; j <= n ; j++) { |
|
47 |
if (p >= splittedValues.length) { |
|
48 |
|
|
49 |
} else { |
|
50 |
newlines.get(j)[p] = splittedValues[p]; |
|
43 |
for (int p = 1 ; p < split.length ; p++) { |
|
44 |
newlines.get(j)[p] = "_"; |
|
45 |
} |
|
46 |
} |
|
47 |
|
|
48 |
//System.out.println("FIXING "+split); |
|
49 |
for (int p = 1 ; p < split.length - 1 ; p++) { |
|
50 |
String v = split[p]; |
|
51 |
String[] splittedValues = v.split("\\."); |
|
52 |
if (splittedValues.length == newlines.size()) { |
|
53 |
for (int j = 0 ; j <= n ; j++) { |
|
54 |
newlines.get(j)[p] = splittedValues[j]; |
|
51 | 55 |
} |
56 |
} else if ((splittedValues.length - 1) == newlines.size()) { |
|
57 |
for (int j = 0 ; j <= n ; j++) { |
|
58 |
newlines.get(j)[p] = splittedValues[j+1]; |
|
59 |
} |
|
60 |
} else { |
|
61 |
for (int j = 0 ; j <= n ; j++) { |
|
62 |
newlines.get(j)[p] = split[p]; |
|
63 |
} |
|
52 | 64 |
} |
53 | 65 |
} |
54 | 66 |
|
67 |
for (int j = 0 ; j <= n ; j++) { |
|
68 |
splittedLines.add(i+j+1, newlines.get(j)); |
|
69 |
|
|
70 |
} |
|
71 |
i = i + newlines.size(); |
|
72 |
|
|
55 | 73 |
} else { |
56 |
System.out.println("NOT FIXING "+conll.get(i)); |
|
74 |
//System.out.println("NOT FIXING "+conll.get(i));
|
|
57 | 75 |
} |
76 |
|
|
77 |
|
|
58 | 78 |
} |
59 |
|
|
79 |
} |
|
80 |
|
|
81 |
for (int i = 0 ; i < splittedLines.size() ; i++) { |
|
82 |
String split[] = splittedLines.get(i); |
|
60 | 83 |
conll2.add(StringUtils.join(split, "\t")); |
61 |
|
|
62 | 84 |
} |
85 |
|
|
63 | 86 |
for (String l : conll2) System.out.println(l); |
64 | 87 |
|
65 | 88 |
String bundle_id = "org.txm.conllu.core"; |
Formats disponibles : Unified diff