Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / fleurs / compiler.groovy @ 1000

History | View | Annotate | Download (7.3 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.importer.fleurs;
29

    
30
import org.txm.importer.cwb.CwbEncode
31
import org.txm.importer.cwb.CwbMakeAll
32
import org.txm.scripts.importer.*;
33
import org.txm.scripts.*;
34
import org.txm.importer.scripts.xmltxm.BuildTTSrc;
35
import org.txm.importer.scripts.xmltxm.*;
36
import org.txm.utils.treetagger.TreeTagger;
37

    
38
import javax.xml.stream.*;
39
import java.net.URL;
40
import java.io.File;
41
import java.net.URL;
42
import java.util.HashMap;
43
import java.util.List;
44

    
45
// TODO: Auto-generated Javadoc
46
/**
47
 * The Class compiler.
48
 */
49
class compiler{
50
        
51
        /** The input data. */
52
        private def inputData;
53
        
54
        /** The factory. */
55
        private def factory;
56
        
57
        /** The parser. */
58
        private def parser;
59
        
60
        /** The dir. */
61
        private def dir;
62
        
63
        /** The output. */
64
        private def output;
65
        
66
        /** The url. */
67
        private def url;
68
        
69
        /** The text. */
70
        String text="";
71
        
72
        /** The base. */
73
        String base="";
74
        
75
        /** The project. */
76
        String project="";
77
        
78
        /** The lang. */
79
        private String lang ="fr";
80
        
81
        /**
82
         * Instantiates a new compiler.
83
         */
84
        public compiler(){}
85
        
86
        /**
87
         * initialize.
88
         *
89
         * @param url the url
90
         * @param text the text
91
         * @param base the base
92
         * @param project the project
93
         */
94
        public compiler(URL url,String text,String base, String project)
95
        {
96
                this.text = text
97
                this.base = base;
98
                this.project = project;
99
                
100
                try {
101
                        this.url = url;
102
                        inputData = url.openStream();
103
                        
104
                        factory = XMLInputFactory.newInstance();
105
                        parser = factory.createXMLStreamReader(inputData);
106
                } catch (XMLStreamException ex) {
107
                        System.out.println(ex);
108
                }catch (IOException ex) {
109
                        System.out.println("IOException while parsing ");
110
                }
111
        }
112
        
113
        /**
114
         * set the language of the corpus.
115
         *
116
         * @param lang the lang
117
         * @return the java.lang. object
118
         */
119
        public setLang(String lang)
120
        {
121
                this.lang = lang;
122
        }
123
        
124
        /**
125
         * Creates the output.
126
         *
127
         * @param dirPathName the dir path name
128
         * @param fileName the file name
129
         * @return true, if successful
130
         */
131
        private boolean createOutput(String dirPathName, String fileName){
132
                try {
133
                        File f = new File(dirPathName, fileName)
134
                        output = new OutputStreamWriter(new FileOutputStream(f,f.exists()) , "UTF-8")
135
                        return true;
136
                } catch (Exception e) {
137
                        System.out.println(e.getLocalizedMessage());
138
                        return false;
139
                }
140
        }
141
                
142
        /**
143
         * Transfom file cqp.
144
         *
145
         * @param dirPathName the dir path name
146
         * @param fileName the file name
147
         * @return true, if successful
148
         */
149
        public boolean transfomFileCqp(String dirPathName, String fileName)
150
        {
151
                createOutput(dirPathName, fileName);
152
                
153
                String vForm = "";
154
                String wordid= "";
155
                String w_l= "";
156
                String w_lemme= "";
157
                String w_pos= "";
158
                String w_x= "";
159
                String w_z= "";
160
                
161
                boolean flagForm = false;
162
                output.write("<txmcorpus lang=\""+lang+"\">\n"); 
163
                try 
164
                {
165
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) 
166
                        {
167
                                switch (event) 
168
                                {
169
                                        case XMLStreamConstants.START_ELEMENT:
170
                                        switch (parser.getLocalName()) 
171
                                        {
172
                                        case "recueil":
173
                                                output.write("<text id=\""+text+"\" base=\"fleurs\" project=\"default\">\n");
174
                                                break;
175
                                        case "w":
176
                                                flagForm= true;
177
                                                w_l = parser.getAttributeValue(null,"l");
178
                                                w_lemme = parser.getAttributeValue(null,"lemme");
179
                                                w_pos = parser.getAttributeValue(null,"pos");
180
                                                w_x= parser.getAttributeValue(null,"x");
181
                                                w_z= parser.getAttributeValue(null,"z");
182
                                                wordid= parser.getAttributeValue(null,"id");
183
                                                vForm ="";
184
                                                break;
185
                                        default:
186
                                                output.write("<"+parser.getLocalName())
187
                                                for(int i =0; i < parser.getAttributeCount() ; i++)
188
                                                                output.write(" "+parser.getAttributeLocalName(i)+"=\""+parser.getAttributeValue(i)+"\"")
189
                                                        output.write(">\n");
190
                                                break;
191
                                        }
192
                                        break;
193
                                        
194
                                        case XMLStreamConstants.END_ELEMENT:
195
                                        switch (parser.getLocalName()) 
196
                                        {
197
                                        case "recueil":
198
                                                output.write("</text>\n");
199
                                                break;
200
                                        case "w":
201
                                                flagForm= false;
202
                                                output.write(vForm+"\t"+wordid+"\t"+w_l+"\t"+w_lemme+"\t"+w_pos+"\t"+w_x+"\t"+w_z+"\n")
203
                                                break;
204
                                        default:
205
                                                output.write("</"+parser.getLocalName()+">\n");
206
                                                break;
207
                                        }
208
                                        break;
209
                                        
210
                                        case XMLStreamConstants.CHARACTERS:
211
                                                if(flagForm)
212
                                                        vForm += parser.getText().trim();
213
                                        
214
                                        break;
215
                                }
216
                        }
217
                        output.write("</txmcorpus>"); 
218
                        output.close();
219
                        parser.close();
220
                        inputData.close();
221
                }
222
                catch (XMLStreamException ex) {
223
                        System.out.println(ex);
224
                }
225
                catch (IOException ex) {
226
                        System.out.println("IOException while parsing " + inputData);
227
                }
228
                
229
                return true;
230
        }
231
        
232
        /**
233
         * Run.
234
         *
235
         * @param rootDir the root dir
236
         */
237
        public boolean run(String rootDir) 
238
        {
239
                if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
240
                        println ("Error: CWB executables not well set.")
241
                        return false;
242
                }
243
                new File(rootDir,"cqp/fleurs.cqp").delete();//cleaning&preparing
244
                new File(rootDir,"cqp/").deleteDir();
245
                new File(rootDir,"cqp/").mkdir();
246
                new File(rootDir,"registry/").mkdir();
247
                
248
                //1- Transform into CQP file
249
                List<File> files = new File(rootDir,"txm").listFiles();
250
                for (File f : files) {
251
                        if (!f.exists()) {
252
                                println("file "+f+ " does not exists")        
253
                        }
254
                        else
255
                        {        
256
                                println("process file "+f)
257
                                String txtname = f.getName().substring(0,f.getName().length()-4);
258
                                def builder = new compiler(f.toURL(), txtname, "fleurs", "default");
259
                                builder.setLang lang
260
                                builder.transfomFileCqp(rootDir,"cqp/fleurs.cqp");
261
                        }
262
                }
263
                
264
                //2- Import into CWB
265
                def outDir =rootDir;
266
                def outDirTxm = rootDir;
267
                
268
                CwbEncode cwbEn = new CwbEncode();
269
                cwbEn.setDebug true;
270
                CwbMakeAll cwbMa = new CwbMakeAll();
271
                cwbMa.setDebug true;
272
                
273
                String[] pAttributes = ["id","l","lemme","pos","x","z"];
274
                String[] sAttributes = ["txmcorpus:0+lang", "text:0+id+base+project","poeme:0+titre+genre+type+section+tranche_chrono+date+annee+titre_abr+preorig+schema1+schema2+n","strophe:0+ordre+nb_vers+structure","div:0+type+structure","vers:0+numvers+tphon+stru+n_syl+n_pied+cesure+rime"];
275
                
276
                try {
277
                        String regPath = outDirTxm + "/registry/"+"fleurs"
278
                        cwbEn.run(outDirTxm + "data", outDir + "/cqp/"+"fleurs.cqp", regPath,pAttributes, sAttributes);
279
                        if (!new File(regPath).exists()) {
280
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
281
                                return false;
282
                        }
283
                        cwbMa.run("FLEURS", outDirTxm + "/registry");
284
                        
285
                } catch (Exception ex) {System.out.println(ex); return false;}
286
                
287
                System.out.println("Done.") 
288
                return true;
289
        }
290
}