Bug #1696
RCP: 0.7.8, update 201602261727, XTZ import, tokenization step fails on some corpora
| Statut: | Closed | Début: | 29/02/2016 | |
|---|---|---|---|---|
| Priorité: | Normal | Echéance: | ||
| Assigné à: | - | % réalisé: | 100% |
|
| Catégorie: | Import | Temps passé: | - | |
| Version cible: | TXM 0.7.8 |
Description
System: Ubuntu 14.04
TXM Version: 0.7.8, update 201602261727
Test corpus:ensldfs/Laboratoires/labo_ana_corpus/Projets/Textométrie/SpUV/BVH/bvhepistemon2016/src/bvhepistemonxtz
Console message:
Saving import parameters...
-- Split-Merge XSL Step with /home/alavrent/xml/bvhepistemonxtz/xsl/1-split-merge
-- Applying /home/alavrent/xml/bvhepistemonxtz/xsl/1-split-merge/txm-filter-teibvh-xmlw.xsl XSL to 7 files with parameters: {output-directory=file:/home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/} on directory /home/alavrent/xml/bvhepistemonxtz
..
-- Front XSL Step with /home/alavrent/xml/bvhepistemonxtz/xsl/2-front
Nothing to do.
-- check XML files well formness.
..
-- Tokenizing 2 files
.Error : /home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/1542_RabelaisGrgt.xml
java.lang.NullPointerException
at filters.Tokeniser.SimpleTokenizerXml.process(SimpleTokenizerXml.groovy:391)
at filters.Tokeniser.SimpleTokenizerXml$process$2.call(Unknown Source)
at org.txm.importer.xtz.XTZImporter.doTokenizeStep(XTZImporter.groovy:334)
at org.txm.importer.xtz.XTZImporter.process(XTZImporter.groovy:114)
at org.txm.importer.xtz.ImportModule.start(ImportModule.java:91)
at org.txm.importer.xtz.XTZImport.super$2$start(XTZImport.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:233)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1085)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:128)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:148)
at org.txm.importer.xtz.XTZImport.start(XTZImport.groovy:32)
at org.txm.importer.xtz.XTZImport$start.call(Unknown Source)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCall(CallSiteArray.java:45)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:108)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:112)
at org.txm.importer.xtz.xtzLoader.run(xtzLoader.groovy:82)
at groovy.util.GroovyScriptEngine.run(GroovyScriptEngine.java:577)
at org.txm.rcpapplication.commands.ExecuteImportScript$2.run(ExecuteImportScript.java:221)
at org.eclipse.core.internal.jobs.Worker.run(Worker.java:54)
Failed to process /home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/1542_RabelaisGrgt.xml
.Error : /home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/1562_MontaigneArret.xml
java.lang.NullPointerException
at filters.Tokeniser.SimpleTokenizerXml.process(SimpleTokenizerXml.groovy:391)
at filters.Tokeniser.SimpleTokenizerXml$process$2.call(Unknown Source)
at org.txm.importer.xtz.XTZImporter.doTokenizeStep(XTZImporter.groovy:334)
at org.txm.importer.xtz.XTZImporter.process(XTZImporter.groovy:114)
at org.txm.importer.xtz.ImportModule.start(ImportModule.java:91)
at org.txm.importer.xtz.XTZImport.super$2$start(XTZImport.groovy)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.codehaus.groovy.reflection.CachedMethod.invoke(CachedMethod.java:90)
at groovy.lang.MetaMethod.doMethodInvoke(MetaMethod.java:233)
at groovy.lang.MetaClassImpl.invokeMethod(MetaClassImpl.java:1085)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuperN(ScriptBytecodeAdapter.java:128)
at org.codehaus.groovy.runtime.ScriptBytecodeAdapter.invokeMethodOnSuper0(ScriptBytecodeAdapter.java:148)
at org.txm.importer.xtz.XTZImport.start(XTZImport.groovy:32)
at org.txm.importer.xtz.XTZImport$start.call(Unknown Source)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCall(CallSiteArray.java:45)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:108)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.call(AbstractCallSite.java:112)
at org.txm.importer.xtz.xtzLoader.run(xtzLoader.groovy:82)
at groovy.util.GroovyScriptEngine.run(GroovyScriptEngine.java:577)
at org.txm.rcpapplication.commands.ExecuteImportScript$2.run(ExecuteImportScript.java:221)
at org.eclipse.core.internal.jobs.Worker.run(Worker.java:54)
Failed to process /home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/1562_MontaigneArret.xml
-- Posttokenize XSL Step with /home/alavrent/xml/bvhepistemonxtz/xsl/3-posttok
-- Applying /home/alavrent/xml/bvhepistemonxtz/xsl/3-posttok/txm-filter-teibvh-xmlw-posttok.xsl XSL to 0 files with parameters: {output-directory=file:/home/alavrent/TXM/corpora/BVHEPISTEMONXTZ/src/}
-- Building XML-TXM (0 files)
Error while importing corpus, reason=none
Import done:324msec (324 ms)
The import process failed.
Running SearchEngine in memory mode.
Validation test¶
Import the corpus with XTZ import module with and without the front XSL and the post-tokenization XSL.
Test corpus: ensldfs/Laboratoires/labo_ana_corpus/Projets/Textométrie/SpUV/BVH/bvhepistemon2016/src/bvhepistemonxtz
Historique
#1 Mis à jour par Matthieu Decorde il y a plus de 9 ans
- % réalisé changé de 0 à 80
#2 Mis à jour par Matthieu Decorde il y a plus de 9 ans
- Description mis à jour (diff)
- Catégorie mis à Import
#3 Mis à jour par Matthieu Decorde il y a plus de 9 ans
- Description mis à jour (diff)
#4 Mis à jour par Sebastien Jacquot il y a presque 2 ans
- Statut changé de New à Closed
#5 Mis à jour par Sebastien Jacquot il y a presque 2 ans
- % réalisé changé de 80 à 100