67 |
67 |
files.sort()
|
68 |
68 |
|
69 |
69 |
// Keep or not contractions
|
70 |
|
String keepContractions = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.MULTIWORDS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.MULTIWORDS_MANAGEMENT));
|
|
70 |
String keepContractions = UDPreferences.getInstance().getProjectPreferenceValue(project, UDPreferences.CONTRACTIONS_MANAGEMENT, UDPreferences.getInstance().getString(UDPreferences.CONTRACTIONS_MANAGEMENT));
|
71 |
71 |
|
72 |
|
println "Multiwords managment ($keepContractions) & add XmlId if necessary & remove empty nodes"
|
|
72 |
println "Contractions managment ($keepContractions) & add XmlId if necessary & remove empty nodes"
|
73 |
73 |
ConsoleProgressBar cpb_texts = new ConsoleProgressBar(files.size())
|
74 |
74 |
for (File conlluFile : files) {
|
75 |
75 |
cpb_texts.tick()
|
... | ... | |
100 |
100 |
|
101 |
101 |
if (keepContractions == UDPreferences.ALL) {
|
102 |
102 |
// ok on fait rien
|
103 |
|
} else if (keepContractions == UDPreferences.TOKENS) {
|
|
103 |
} else if (keepContractions == UDPreferences.SYNTAX) {
|
104 |
104 |
if (split[0].contains("-")) {
|
105 |
105 |
//println "REMOVE - $split"
|
106 |
106 |
lines.remove(i)
|
107 |
107 |
i--
|
108 |
108 |
continue; /// next !
|
109 |
109 |
}
|
110 |
|
} else if (keepContractions == UDPreferences.MULTIWORDS) {
|
|
110 |
} else if (keepContractions == UDPreferences.SURFACE) {
|
111 |
111 |
if (split[0].contains("-")) {
|
112 |
112 |
int n1 = Integer.parseInt(split[0].substring(0, split[0].indexOf("-")));
|
113 |
|
split[0] = ""+n1
|
114 |
|
|
115 |
|
|
116 |
113 |
int n2 = Integer.parseInt(split[0].substring(1 + split[0].indexOf("-")));
|
117 |
|
int n = 1 + n2 - n1
|
|
114 |
int n = n2 - n1
|
118 |
115 |
|
|
116 |
split[0] = ""+n1
|
|
117 |
|
119 |
118 |
def splits = []
|
120 |
119 |
for (int j = 1 ; j <= n ;j++) {
|
121 |
120 |
def tmp = lines[i+j].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
|
... | ... | |
135 |
134 |
if (split[9].length() > 0) split[9] += "|"
|
136 |
135 |
split[9] += "expand="+splits.collect(){it[1]}.join("_")
|
137 |
136 |
|
138 |
|
//println "REMOVE non- $split"
|
139 |
|
for (int j = 1 ; j <= n ;j++) {
|
140 |
|
splits << lines[i+j].split("\t", ImportCoNLLUAnnotations.UD_PROPERTY_NAMES.length);
|
|
137 |
println "REMOVE non- $split"
|
|
138 |
for (int j = 0 ; j <= n ;j++) {
|
141 |
139 |
lines.remove(i+1)
|
142 |
140 |
}
|
|
141 |
println "splits=$splits"
|
143 |
142 |
}
|
144 |
143 |
}
|
145 |
144 |
|