Révision 3573

TXM/trunk/org.txm.conllu.core/META-INF/MANIFEST.MF (revision 3573)
1 1
Manifest-Version: 1.0
2
Automatic-Module-Name: org.txm.connlu.core
2
Automatic-Module-Name: org.txm.conllu.core
3 3
Bundle-SymbolicName: org.txm.conllu.core;singleton:=true
4 4
Export-Package: org.txm.conllu.core,
5 5
 org.txm.conllu.core.function,
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImporter.groovy (revision 3573)
32 32
	@Override
33 33
	public void process() {
34 34
		
35
		File connluSrcDirectory = inputDirectory
35
		File conlluSrcDirectory = inputDirectory
36 36
		
37 37
		boolean usenewdocid =  UDPreferences.getInstance().getString(UDPreferences.IMPORT_USE_NEW_DOC_ID); // THE conllu -> Tiger XSL MUST HAVE THE SAME BEHAVIOR BEFORE //
38 38
		
39 39
		if (usenewdocid) {
40
			connluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
41
			connluSrcDirectory.deleteDir();
42
			connluSrcDirectory.mkdirs();
40
			conlluSrcDirectory = new File(outputDirectory.getParentFile().getParentFile(), "conllu")
41
			conlluSrcDirectory.deleteDir();
42
			conlluSrcDirectory.mkdirs();
43 43
			
44 44
			println "Convert CoNLL-U to XML-TEI..."
45
			if (!splitCoNLLUFiles(inputDirectory, connluSrcDirectory, project)) {
45
			if (!splitCoNLLUFiles(inputDirectory, conlluSrcDirectory, project)) {
46 46
				return;
47 47
			}
48 48
		}
......
51 51
		srcDirectory.mkdirs();
52 52
		
53 53
		println "Convert CoNLL-U to XML-TEI..."
54
		convertCoNLLU2TEI(connluSrcDirectory, srcDirectory, project)
54
		convertCoNLLU2TEI(conlluSrcDirectory, srcDirectory, project)
55 55
		
56 56
		inputDirectory = srcDirectory // switch source directory
57 57
		
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl.tdy (revision 3573)
47 47
# for Profiterole project (2019-2021)
48 48

  
49 49
# 2019-09-25
50
# - updated default column numbers for CONNL-U SRCMF format
50
# - updated default column numbers for CONLL-U SRCMF format
51 51
# - added processing for comment lines
52 52
# - added @textid to terminal nodes
53 53
# - deleted ppos, pmor et plemma (predicted tags and lemmas)
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tigerud2.groovy (revision 3573)
48 48
// for Profiterole project (2019-2021)
49 49

  
50 50
// 2019-09-25
51
// - updated default column numbers for CONNL-U SRCMF format
51
// - updated default column numbers for CoNLL-U SRCMF format
52 52
// - added processing for comment lines
53 53
// - added @textid to terminal nodes
54 54
// - deleted ppos, pmor et plemma (predicted tags and lemmas)
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/conll2tiger-ud.pl (revision 3573)
47 47
# for Profiterole project (2019-2021)
48 48

  
49 49
# 2019-09-25
50
# - updated default column numbers for CONNL-U SRCMF format
50
# - updated default column numbers for CONLL-U SRCMF format
51 51
# - added processing for comment lines
52 52
# - added @textid to terminal nodes
53 53
# - deleted ppos, pmor et plemma (predicted tags and lemmas)
TXM/trunk/org.txm.conllu.core/groovy/org/txm/scripts/importer/conllu/CoNLLUImport.groovy (revision 3573)
92 92
			for (String name : xmltxmFilesNames) {
93 93
				name = FileUtils.stripExtension(name);
94 94
				
95
				File connluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
96
				//println " test "+connluFile
97
				if (connluFile.exists()) {
98
					tigerxmlFiles << connluFile
95
				File conlluFile = new File(this.binaryDirectory, "tiger-xml/"+name+".xml")
96
				//println " test "+conlluFile
97
				if (conlluFile.exists()) {
98
					tigerxmlFiles << conlluFile
99 99
				}
100 100
			}
101 101
			
TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CallUD2TigerPerlScript.java (revision 3573)
29 29
		return true;
30 30
	}
31 31
	
32
	public boolean convertCoNLLUFiles(File[] connluFiles, String output_directory) throws IOException {
32
	public boolean convertCoNLLUFiles(File[] conlluFiles, String output_directory) throws IOException {
33 33
		
34
		if (connluFiles.length == 0) return false;
34
		if (conlluFiles.length == 0) return false;
35 35
		
36 36
		File tigerXMLDirectory = new File(output_directory, "tiger-xml");
37 37
		File conversionFile = new File(tigerXMLDirectory, "conversion.log");
......
50 50
		
51 51
		String subcorpusList = "";
52 52

  
53
		ConsoleProgressBar cpb = new ConsoleProgressBar(connluFiles.length);
54
		for (File f : connluFiles) {
53
		ConsoleProgressBar cpb = new ConsoleProgressBar(conlluFiles.length);
54
		for (File f : conlluFiles) {
55 55
			cpb.tick();
56 56
			if (!f.getName().endsWith(".conllu")) {
57 57
				continue;
TXM/trunk/org.txm.conllu.core/src/org/txm/conllu/core/CoNLLU2TIGER.java (revision 3573)
65 65
		mainWriter.writeStartElement("corpus");
66 66
		tigerWriter.writeAttribute("id", corpusName);
67 67
		
68
		for (File connluFile : conlluFiles) {
68
		for (File conlluFile : conlluFiles) {
69 69
			
70 70
			
71
			String filename = FileUtils.stripExtension(connluFile);
71
			String filename = FileUtils.stripExtension(conlluFile);
72 72
			File tigerXMLFile = new File(tigerDirectory, filename+".xml");
73 73
			tigerOutput = new BufferedOutputStream(new FileOutputStream(tigerXMLFile), 16 * 1024);
74 74
			tigerWriter = outfactory.createXMLStreamWriter(tigerOutput, "UTF-8"); // create a new file
75 75
			
76
			CoNLLUReader reader = new CoNLLUReader(connluFile, null);
76
			CoNLLUReader reader = new CoNLLUReader(conlluFile, null);
77 77
			Sentence s = reader.readNext();
78 78
			
79 79
			// "<subcorpus name='$infilename-$suffix' external='file:$infilename-$suffix.xml'/>\n";

Formats disponibles : Unified diff