Révision 3573
TXM/trunk/org.txm.conllu.core/META-INF/MANIFEST.MF (revision 3573) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 |
Automatic-Module-Name: org.txm.connlu.core
|
|
2 |
Automatic-Module-Name: org.txm.conllu.core
|
|
3 | 3 |
Bundle-SymbolicName: org.txm.conllu.core;singleton:=true |
4 | 4 |
Export-Package: org.txm.conllu.core, |
5 | 5 |
org.txm.conllu.core.function, |
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 3573) | ||
---|---|---|
32 | 32 |
@Override |
33 | 33 |
public void process() { |
34 | 34 |
|
35 |
File connluSrcDirectory = inputDirectory
|
|
35 |
File conlluSrcDirectory = inputDirectory
|
|
36 | 36 |
|
37 | 37 |
boolean usenewdocid = UDPreferences.getInstance().getString(UDPreferences.IMPORT_USE_NEW_DOC_ID); // THE conllu -> Tiger XSL MUST HAVE THE SAME BEHAVIOR BEFORE // |
38 | 38 |
|
39 | 39 |
if (usenewdocid) { |
40 |
connluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
|
|
41 |
connluSrcDirectory.deleteDir();
|
|
42 |
connluSrcDirectory.mkdirs();
|
|
40 |
conlluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
|
|
41 |
conlluSrcDirectory.deleteDir();
|
|
42 |
conlluSrcDirectory.mkdirs();
|
|
43 | 43 |
|
44 | 44 |
println "Convert CoNLL-U to XML-TEI..." |
45 |
if (!splitCoNLLUFiles(inputDirectory, connluSrcDirectory, project)) {
|
|
45 |
if (!splitCoNLLUFiles(inputDirectory, conlluSrcDirectory, project)) {
|
|
46 | 46 |
return; |
47 | 47 |
} |
48 | 48 |
} |
... | ... | |
51 | 51 |
srcDirectory.mkdirs(); |
52 | 52 |
|
53 | 53 |
println "Convert CoNLL-U to XML-TEI..." |
54 |
convertCoNLLU2TEI(connluSrcDirectory, srcDirectory, project)
|
|
54 |
convertCoNLLU2TEI(conlluSrcDirectory, srcDirectory, project)
|
|
55 | 55 |
|
56 | 56 |
inputDirectory = srcDirectory // switch source directory |
57 | 57 |
|
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl.tdy (revision 3573) | ||
---|---|---|
47 | 47 |
# for Profiterole project (2019-2021) |
48 | 48 |
|
49 | 49 |
# 2019-09-25 |
50 |
# - updated default column numbers for CONNL-U SRCMF format
|
|
50 |
# - updated default column numbers for CONLL-U SRCMF format
|
|
51 | 51 |
# - added processing for comment lines |
52 | 52 |
# - added @textid to terminal nodes |
53 | 53 |
# - deleted ppos, pmor et plemma (predicted tags and lemmas) |
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tigerud2.groovy (revision 3573) | ||
---|---|---|
48 | 48 |
// for Profiterole project (2019-2021) |
49 | 49 |
|
50 | 50 |
// 2019-09-25 |
51 |
// - updated default column numbers for CONNL-U SRCMF format
|
|
51 |
// - updated default column numbers for CoNLL-U SRCMF format
|
|
52 | 52 |
// - added processing for comment lines |
53 | 53 |
// - added @textid to terminal nodes |
54 | 54 |
// - deleted ppos, pmor et plemma (predicted tags and lemmas) |
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl (revision 3573) | ||
---|---|---|
47 | 47 |
# for Profiterole project (2019-2021) |
48 | 48 |
|
49 | 49 |
# 2019-09-25 |
50 |
# - updated default column numbers for CONNL-U SRCMF format
|
|
50 |
# - updated default column numbers for CONLL-U SRCMF format
|
|
51 | 51 |
# - added processing for comment lines |
52 | 52 |
# - added @textid to terminal nodes |
53 | 53 |
# - deleted ppos, pmor et plemma (predicted tags and lemmas) |
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImport.groovy (revision 3573) | ||
---|---|---|
92 | 92 |
for (String name : xmltxmFilesNames) { |
93 | 93 |
name = FileUtils.stripExtension(name); |
94 | 94 |
|
95 |
File connluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
|
|
96 |
//println " test "+connluFile
|
|
97 |
if (connluFile.exists()) {
|
|
98 |
tigerxmlFiles << connluFile
|
|
95 |
File conlluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
|
|
96 |
//println " test "+conlluFile
|
|
97 |
if (conlluFile.exists()) {
|
|
98 |
tigerxmlFiles << conlluFile
|
|
99 | 99 |
} |
100 | 100 |
} |
101 | 101 |
|
TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CallUD2TigerPerlScript.java (revision 3573) | ||
---|---|---|
29 | 29 |
return true; |
30 | 30 |
} |
31 | 31 |
|
32 |
public boolean convertCoNLLUFiles(File[] connluFiles, String output_directory) throws IOException {
|
|
32 |
public boolean convertCoNLLUFiles(File[] conlluFiles, String output_directory) throws IOException {
|
|
33 | 33 |
|
34 |
if (connluFiles.length == 0) return false;
|
|
34 |
if (conlluFiles.length == 0) return false;
|
|
35 | 35 |
|
36 | 36 |
File tigerXMLDirectory = new File(output_directory, "tiger-xml"); |
37 | 37 |
File conversionFile = new File(tigerXMLDirectory, "conversion.log"); |
... | ... | |
50 | 50 |
|
51 | 51 |
String subcorpusList = ""; |
52 | 52 |
|
53 |
ConsoleProgressBar cpb = new ConsoleProgressBar(connluFiles.length);
|
|
54 |
for (File f : connluFiles) {
|
|
53 |
ConsoleProgressBar cpb = new ConsoleProgressBar(conlluFiles.length);
|
|
54 |
for (File f : conlluFiles) {
|
|
55 | 55 |
cpb.tick(); |
56 | 56 |
if (!f.getName().endsWith(".conllu")) { |
57 | 57 |
continue; |
TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CoNLLU2TIGER.java (revision 3573) | ||
---|---|---|
65 | 65 |
mainWriter.writeStartElement("corpus"); |
66 | 66 |
tigerWriter.writeAttribute("id", corpusName); |
67 | 67 |
|
68 |
for (File connluFile : conlluFiles) {
|
|
68 |
for (File conlluFile : conlluFiles) {
|
|
69 | 69 |
|
70 | 70 |
|
71 |
String filename = FileUtils.stripExtension(connluFile);
|
|
71 |
String filename = FileUtils.stripExtension(conlluFile);
|
|
72 | 72 |
File tigerXMLFile = new File(tigerDirectory, filename+".xml"); |
73 | 73 |
tigerOutput = new BufferedOutputStream(new FileOutputStream(tigerXMLFile), 16 * 1024); |
74 | 74 |
tigerWriter = outfactory.createXMLStreamWriter(tigerOutput, "UTF-8"); // create a new file |
75 | 75 |
|
76 |
CoNLLUReader reader = new CoNLLUReader(connluFile, null);
|
|
76 |
CoNLLUReader reader = new CoNLLUReader(conlluFile, null);
|
|
77 | 77 |
Sentence s = reader.readNext(); |
78 | 78 |
|
79 | 79 |
// "<subcorpus name='$infilename-$suffix' external='file:$infilename-$suffix.xml'/>\n"; |
Formats disponibles : Unified diff