Révision 3241
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/SimpleTokenizerXml.groovy (revision 3241) | ||
---|---|---|
183 | 183 |
if (event == XMLStreamConstants.START_ELEMENT ) { |
184 | 184 |
|
185 | 185 |
localname = parser.getLocalName(); |
186 |
if (wordid != null) localname = word_element_to_create; |
|
186 |
if (wordid != null) { |
|
187 |
localname = word_element_to_create; |
|
188 |
} |
|
187 | 189 |
|
188 |
if (prefix != null && prefix.length() > 0) |
|
190 |
if (prefix != null && prefix.length() > 0) {
|
|
189 | 191 |
writer.writeStartElement(prefix+":"+localname); |
190 |
else
|
|
192 |
} else {
|
|
191 | 193 |
// if(namespace != null) |
192 | 194 |
// writer.writeStartElement(namespace, localname); |
193 | 195 |
// else |
194 | 196 |
writer.writeStartElement(localname); |
195 |
|
|
197 |
} |
|
196 | 198 |
// if(parser.getNamespaceCount() > 0) |
197 | 199 |
// writer.writeDefaultNamespace(parser.getNamespaceURI(0)) |
198 | 200 |
// for(int i = 1 ; i < parser.getNamespaceCount() ; i++) |
... | ... | |
219 | 221 |
//if ("type".equals(attname)) hasType = true; |
220 | 222 |
if ("n".equals(attname)) hasN = true; |
221 | 223 |
|
222 |
if (attrprefix != null && attrprefix.length() > 0) |
|
224 |
if (attrprefix != null && attrprefix.length() > 0) {
|
|
223 | 225 |
writer.writeAttribute(attrprefix+":"+attname, parser.getAttributeValue(i)) |
224 |
else
|
|
226 |
} else {
|
|
225 | 227 |
writer.writeAttribute(attname, parser.getAttributeValue(i)) |
228 |
} |
|
226 | 229 |
} |
227 | 230 |
|
228 |
if (wordid != null && !hasId && localname == word_element_to_create) |
|
231 |
if (wordid != null && !hasId && localname == word_element_to_create) {
|
|
229 | 232 |
writer.writeAttribute("id", "w_"+filename+"_"+wordcount); |
230 |
|
|
231 |
if (!hasN && localname == word_element_to_create) |
|
233 |
} |
|
234 |
if (!hasN && localname == word_element_to_create) {
|
|
232 | 235 |
writer.writeAttribute("n", ""+wordcount); |
233 |
|
|
236 |
} |
|
234 | 237 |
if (!reg_word_tags.matcher(localname).matches()) { |
235 | 238 |
writer.writeCharacters("\n"); |
236 | 239 |
} |
... | ... | |
304 | 307 |
* |
305 | 308 |
* @return true, if successful |
306 | 309 |
*/ |
307 |
public boolean process() |
|
308 |
{ |
|
310 |
public boolean process() { |
|
309 | 311 |
if (!infile.exists()) { |
310 | 312 |
println "$infile does not exists" |
311 | 313 |
return false; |
... | ... | |
531 | 533 |
writer.writeAttribute("n",""+wordcount); |
532 | 534 |
for (String attr : retokenizedWordProperties.keySet()) { |
533 | 535 |
if ("id" == attr) { |
534 |
writer.writeAttribute("previous-id", retokenizedWordProperties[attr]);
|
|
536 |
writer.writeAttribute("old-id", retokenizedWordProperties[attr]);
|
|
535 | 537 |
} else if ("n" == attr) { |
536 |
writer.writeAttribute("previous-n", retokenizedWordProperties[attr]);
|
|
538 |
writer.writeAttribute("old-n", retokenizedWordProperties[attr]);
|
|
537 | 539 |
} else { |
538 | 540 |
writer.writeAttribute(attr, retokenizedWordProperties[attr]); |
539 | 541 |
} |
Formats disponibles : Unified diff