Révision 3573
| TXM/trunk/org.txm.conllu.core/META-INF/MANIFEST.MF (revision 3573) | ||
|---|---|---|
| 1 | 1 |
Manifest-Version: 1.0 |
| 2 |
Automatic-Module-Name: org.txm.connlu.core
|
|
| 2 |
Automatic-Module-Name: org.txm.conllu.core
|
|
| 3 | 3 |
Bundle-SymbolicName: org.txm.conllu.core;singleton:=true |
| 4 | 4 |
Export-Package: org.txm.conllu.core, |
| 5 | 5 |
org.txm.conllu.core.function, |
| TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 3573) | ||
|---|---|---|
| 32 | 32 |
@Override |
| 33 | 33 |
public void process() {
|
| 34 | 34 |
|
| 35 |
File connluSrcDirectory = inputDirectory
|
|
| 35 |
File conlluSrcDirectory = inputDirectory
|
|
| 36 | 36 |
|
| 37 | 37 |
boolean usenewdocid = UDPreferences.getInstance().getString(UDPreferences.IMPORT_USE_NEW_DOC_ID); // THE conllu -> Tiger XSL MUST HAVE THE SAME BEHAVIOR BEFORE // |
| 38 | 38 |
|
| 39 | 39 |
if (usenewdocid) {
|
| 40 |
connluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
|
|
| 41 |
connluSrcDirectory.deleteDir();
|
|
| 42 |
connluSrcDirectory.mkdirs();
|
|
| 40 |
conlluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
|
|
| 41 |
conlluSrcDirectory.deleteDir();
|
|
| 42 |
conlluSrcDirectory.mkdirs();
|
|
| 43 | 43 |
|
| 44 | 44 |
println "Convert CoNLL-U to XML-TEI..." |
| 45 |
if (!splitCoNLLUFiles(inputDirectory, connluSrcDirectory, project)) {
|
|
| 45 |
if (!splitCoNLLUFiles(inputDirectory, conlluSrcDirectory, project)) {
|
|
| 46 | 46 |
return; |
| 47 | 47 |
} |
| 48 | 48 |
} |
| ... | ... | |
| 51 | 51 |
srcDirectory.mkdirs(); |
| 52 | 52 |
|
| 53 | 53 |
println "Convert CoNLL-U to XML-TEI..." |
| 54 |
convertCoNLLU2TEI(connluSrcDirectory, srcDirectory, project)
|
|
| 54 |
convertCoNLLU2TEI(conlluSrcDirectory, srcDirectory, project)
|
|
| 55 | 55 |
|
| 56 | 56 |
inputDirectory = srcDirectory // switch source directory |
| 57 | 57 |
|
| TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl.tdy (revision 3573) | ||
|---|---|---|
| 47 | 47 |
# for Profiterole project (2019-2021) |
| 48 | 48 |
|
| 49 | 49 |
# 2019-09-25 |
| 50 |
# - updated default column numbers for CONNL-U SRCMF format
|
|
| 50 |
# - updated default column numbers for CONLL-U SRCMF format
|
|
| 51 | 51 |
# - added processing for comment lines |
| 52 | 52 |
# - added @textid to terminal nodes |
| 53 | 53 |
# - deleted ppos, pmor et plemma (predicted tags and lemmas) |
| TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tigerud2.groovy (revision 3573) | ||
|---|---|---|
| 48 | 48 |
// for Profiterole project (2019-2021) |
| 49 | 49 |
|
| 50 | 50 |
// 2019-09-25 |
| 51 |
// - updated default column numbers for CONNL-U SRCMF format
|
|
| 51 |
// - updated default column numbers for CoNLL-U SRCMF format
|
|
| 52 | 52 |
// - added processing for comment lines |
| 53 | 53 |
// - added @textid to terminal nodes |
| 54 | 54 |
// - deleted ppos, pmor et plemma (predicted tags and lemmas) |
| TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl (revision 3573) | ||
|---|---|---|
| 47 | 47 |
# for Profiterole project (2019-2021) |
| 48 | 48 |
|
| 49 | 49 |
# 2019-09-25 |
| 50 |
# - updated default column numbers for CONNL-U SRCMF format
|
|
| 50 |
# - updated default column numbers for CONLL-U SRCMF format
|
|
| 51 | 51 |
# - added processing for comment lines |
| 52 | 52 |
# - added @textid to terminal nodes |
| 53 | 53 |
# - deleted ppos, pmor et plemma (predicted tags and lemmas) |
| TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImport.groovy (revision 3573) | ||
|---|---|---|
| 92 | 92 |
for (String name : xmltxmFilesNames) {
|
| 93 | 93 |
name = FileUtils.stripExtension(name); |
| 94 | 94 |
|
| 95 |
File connluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
|
|
| 96 |
//println " test "+connluFile
|
|
| 97 |
if (connluFile.exists()) {
|
|
| 98 |
tigerxmlFiles << connluFile
|
|
| 95 |
File conlluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
|
|
| 96 |
//println " test "+conlluFile
|
|
| 97 |
if (conlluFile.exists()) {
|
|
| 98 |
tigerxmlFiles << conlluFile
|
|
| 99 | 99 |
} |
| 100 | 100 |
} |
| 101 | 101 |
|
| TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CallUD2TigerPerlScript.java (revision 3573) | ||
|---|---|---|
| 29 | 29 |
return true; |
| 30 | 30 |
} |
| 31 | 31 |
|
| 32 |
public boolean convertCoNLLUFiles(File[] connluFiles, String output_directory) throws IOException {
|
|
| 32 |
public boolean convertCoNLLUFiles(File[] conlluFiles, String output_directory) throws IOException {
|
|
| 33 | 33 |
|
| 34 |
if (connluFiles.length == 0) return false;
|
|
| 34 |
if (conlluFiles.length == 0) return false;
|
|
| 35 | 35 |
|
| 36 | 36 |
File tigerXMLDirectory = new File(output_directory, "tiger-xml"); |
| 37 | 37 |
File conversionFile = new File(tigerXMLDirectory, "conversion.log"); |
| ... | ... | |
| 50 | 50 |
|
| 51 | 51 |
String subcorpusList = ""; |
| 52 | 52 |
|
| 53 |
ConsoleProgressBar cpb = new ConsoleProgressBar(connluFiles.length);
|
|
| 54 |
for (File f : connluFiles) {
|
|
| 53 |
ConsoleProgressBar cpb = new ConsoleProgressBar(conlluFiles.length);
|
|
| 54 |
for (File f : conlluFiles) {
|
|
| 55 | 55 |
cpb.tick(); |
| 56 | 56 |
if (!f.getName().endsWith(".conllu")) {
|
| 57 | 57 |
continue; |
| TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CoNLLU2TIGER.java (revision 3573) | ||
|---|---|---|
| 65 | 65 |
mainWriter.writeStartElement("corpus");
|
| 66 | 66 |
tigerWriter.writeAttribute("id", corpusName);
|
| 67 | 67 |
|
| 68 |
for (File connluFile : conlluFiles) {
|
|
| 68 |
for (File conlluFile : conlluFiles) {
|
|
| 69 | 69 |
|
| 70 | 70 |
|
| 71 |
String filename = FileUtils.stripExtension(connluFile);
|
|
| 71 |
String filename = FileUtils.stripExtension(conlluFile);
|
|
| 72 | 72 |
File tigerXMLFile = new File(tigerDirectory, filename+".xml"); |
| 73 | 73 |
tigerOutput = new BufferedOutputStream(new FileOutputStream(tigerXMLFile), 16 * 1024); |
| 74 | 74 |
tigerWriter = outfactory.createXMLStreamWriter(tigerOutput, "UTF-8"); // create a new file |
| 75 | 75 |
|
| 76 |
CoNLLUReader reader = new CoNLLUReader(connluFile, null);
|
|
| 76 |
CoNLLUReader reader = new CoNLLUReader(conlluFile, null);
|
|
| 77 | 77 |
Sentence s = reader.readNext(); |
| 78 | 78 |
|
| 79 | 79 |
// "<subcorpus name='$infilename-$suffix' external='file:$infilename-$suffix.xml'/>\n"; |
Formats disponibles : Unified diff