Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / tal / melt / MEltAnnotate.groovy @ 479

History | View | Annotate | Download (4.8 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2013-11-08 13:38:06 +0100 (Fri, 08 Nov 2013) $
25
// $LastChangedRevision: 2569 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.tal.melt
29

    
30
import java.io.BufferedReader;
31
import java.io.File
32
import java.io.IOException;
33
import java.io.InputStream;
34
import java.io.InputStreamReader;
35
import java.text.DateFormat
36
import java.util.ArrayList;
37
import java.util.Date
38
import java.util.Locale;
39

    
40
import org.txm.Toolbox
41
import org.txm.importer.*
42
import org.txm.importer.cwb.*
43
import org.txm.objects.*
44
import org.txm.scripts.teitxm.Annotate;
45
import org.txm.tal.SlashFile2TTRez;
46
import org.txm.utils.LangDetector;
47
import org.txm.utils.treetagger.TreeTagger
48

    
49
// TODO: Auto-generated Javadoc
50
/**
51
 * Annotate and replace the TEI-TXM files of the folder $rootDirFile/txm with TreeTagger.
52
 * creates $rootDirFile/interp and $rootDirFile/treetagger
53
 *
54
 */
55
class MEltAnnotate extends Annotate {
56

    
57
        public void initTTOutfileInfos(File rootDirFile, File modelfile)
58
        {
59
                reportFile = new File(rootDirFile,"NLPToolsParameters.xml");
60

    
61
                respPerson = System.getProperty("user.name");
62
                respId = "txm";
63
                respDesc = "NLP annotation tool";
64
                respDate = DateFormat.getDateInstance(DateFormat.SHORT, Locale.UK).format(new Date());
65
                respWhen = DateFormat.getDateInstance(DateFormat.FULL, Locale.UK).format(new Date());
66

    
67
                appIdent = "MElt";
68
                appVersion = "2.0";
69

    
70
                distributor = "";
71
                publiStmt = """""";
72
                sourceStmt = """""";
73

    
74
                types = ["frpos"];
75
                typesTITLE = ["frpos"];
76

    
77
                typesDesc = ["pos tagset built from MElt"]
78
                typesTAGSET = ["",""]
79
                typesWEB = ["",""]
80

    
81
                idform ="w";
82
        }
83

    
84
        /**
85
         * Apply tt.
86
         *
87
         * @param ttsrcfile the ttsrcfile
88
         * @param ttoutfile the ttoutfile
89
         * @param modelfile the modelfile
90
         * @return true, if successful
91
         */
92
        public boolean applyTT(File ttsrcfile, File ttoutfile, File modelfile) {
93
                return applyMElt(ttsrcfile, ttoutfile)
94
        }
95

    
96
        public boolean applyMElt(File ttsrcfile, File ttoutfile) {
97
                // Call Melt
98
                def args = [];
99
                args << "MElt"
100
                args
101
                ProcessBuilder pb = new ProcessBuilder(args);
102
                Process process = null;
103
                try {
104
                        process = pb.start();
105
                } catch (IOException e) {
106
                        System.err.println(e);
107
                }
108
                InputStream is = process.getInputStream();
109
                OutputStream os = process.getOutputStream();
110
                InputStreamReader isr = new InputStreamReader(is);
111
                BufferedReader br = new BufferedReader(isr);
112
                ttsrcfile.eachLine ("UTF-8") { line ->
113
                        println "> $line"
114
                        os.println line
115
                }
116
                
117
                ttoutfile.withWriter("UTF-8") { writer ->
118
                        String line;
119
                        println "getting result"
120
                        while ((line = br.readLine()) != null) {
121
                                println "< $line"
122
                                writer.println(line);
123
                        }
124
                        println "end of result"
125
                        int e = 0;
126
                        try {
127
                                e = process.waitFor();
128
                        } catch (Exception err) {        }
129

    
130
                        if (e != 0) {
131
                                System.err .println("Process exited abnormally with code " + e + " at " + DateFormat.getDateInstance(DateFormat.FULL, Locale.UK).format(new Date())); //$NON-NLS-1$ //$NON-NLS-2$
132

    
133
                                for (int c = 0; c < args.size(); c++)
134
                                        System.out.print("" + args.get(c) + " "); //$NON-NLS-1$ //$NON-NLS-2$
135
                                System.out.println();
136
                        }
137
                }
138

    
139
                // Reformat MElt output
140
                File tmpFile = new File(ttoutfile.getParentFile(),ttoutfile.getName()+".tmp")
141
                new SlashFile2TTRez(ttoutfile, tmpFile, "UTF-8")
142
        }
143
        
144
        public boolean run(File binDir, File txmDir) {
145
                return run(binDir, txmDir, "fr.par")
146
        }
147

    
148
        public static void main(def args) {
149
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:/Program Files/TXM"));
150
                Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH, new File("/home/mdecorde/treetagger"));
151
                Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH, new File("/home/mdecorde/treetagger/models"));
152
                Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
153
                Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
154
                Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
155
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File("/home/mdecorde/TXM"));
156
                
157
                println "-- ANNOTATE - Running MElt tool"
158

    
159
                File binDir = new File("/home/mdecorde/xml/melt-out")
160
                File txmDir = new File("/home/mdecorde/xml/melt-in")
161
                new MEltAnnotate().run(binDir, txmDir);
162
        }
163
}