Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / scripts / xmltxm / WLX2TXMTEI.groovy @ 1688

History | View | Annotate | Download (8.2 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.importer.scripts.xmltxm
29

    
30
import java.util.ArrayList;
31
import javax.xml.stream.*;
32
import java.net.URL;
33
import org.txm.importer.filters.*;
34

    
35
// TODO: Auto-generated Javadoc
36
/**
37
 * The Class AnnotationInliner.
38
 *
39
 * @author mdecorde build a xml-tei-txm from a weblex xml file
40
 * @deprecated
41
 */
42

    
43
public class WLX2TXMTEI {
44

    
45
        /** The input data. */
46
        private def inputData;
47

    
48
        /** The factory. */
49
        private def factory;
50

    
51
        /** The parser. */
52
        private def parser;
53

    
54
        /** The dir. */
55
        private def dir;
56

    
57
        /** The output. */
58
        private def output;
59

    
60
        /** The url. */
61
        private def url;
62

    
63
        /** The solotags. */
64
        ArrayList<String> solotags;
65

    
66
        /**
67
         * initialize.
68
         *
69
         * @param url the url to the file to transform
70
         * @param solotags temporary list of milestones tags
71
         */
72
        public WLX2TXMTEI(URL url, ArrayList<String> solotags) {
73
                try {
74
                        this.url = url;
75
                        this.solotags = solotags;
76
                        inputData = url.openStream();
77
                        factory = XMLInputFactory.newInstance();
78
                        parser = factory.createXMLStreamReader(inputData);
79
                } catch (XMLStreamException ex) {
80
                        System.out.println(ex);
81
                } catch (IOException ex) {
82
                        System.out.println("IOException while parsing ");
83
                }
84
        }
85

    
86
        /**
87
         * create output FileWriter.
88
         *
89
         * @param dirPathName the dir path name
90
         * @param fileName the file name
91
         * @return true, if successful
92
         */
93
        private boolean createOutput(String dirPathName, String fileName){
94
                try {
95
                        dir = new File(dirPathName)
96
                        File f = new File(dir, fileName);
97
                        output = new java.io.FileWriter(f)
98

    
99
                        return true;
100
                } catch (Exception e) {
101
                        System.out.println("la?"+e);
102
                        return false;
103
                }
104
        }
105

    
106
        /**
107
         * process !!!.
108
         *
109
         * @param dirPathName the dir path name
110
         * @param fileName the file name
111
         * @return true, if successful
112
         */
113
        public boolean transfomFile(String dirPathName, String fileName){
114
                if(createOutput(dirPathName, fileName)){
115

    
116
                        int idPb = 0;
117
                        def idLb = "";
118
                        String idLinesuiv;
119
                        int charcount=0;
120
                        String lastopenlocalname= "";
121

    
122
                        boolean flagWord = false;
123
                        boolean flagNorm = false;
124
                        String vWordNorm = "";
125
                        String anabalises = "";
126
                        String vP2 = "";
127
                        int idcount=1;
128
                        String targets= "w_fro_"
129

    
130
                        try {
131
                                File inputfile = new File(this.url.getFile());
132
                                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
133
                                        switch (event) {
134
                                                case XMLStreamConstants.START_ELEMENT:
135
                                                        switch (parser.getLocalName()) {
136
                                                                case "w":
137
                                                                output.write("\n<w xml:id=\""+targets+idcount+"\"");
138
                                                                idcount++;
139
                                                                anabalises = "";
140
                                                                for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
141
                                                                {
142
                                                                        switch(parser.getAttributeLocalName(i))
143
                                                                        {
144
                                                                                case "p2":
145
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#t1\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
146
                                                                                        break
147
                                                                                case "p3":
148
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#lemma\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
149
                                                                                        break
150
                                                                                case "p4":
151
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#lasla\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
152
                                                                                        break
153
                                                                                case "p5":
154
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#grace\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
155
                                                                                        break
156
                                                                                case "p6":
157
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#t5\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
158
                                                                                        break
159
                                                                                case "p7":
160
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
161
                                                                                        break
162
                                                                                case "p8":
163
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
164
                                                                                        break
165
                                                                                case "p9":
166
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
167
                                                                                        break
168
                                                                                case "p10":
169
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
170
                                                                                        break
171
                                                                                case "p11":
172
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
173
                                                                                        break
174
                                                                                case "p12":
175
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
176
                                                                                        break
177
                                                                                case "p13":
178
                                                                                        anabalises += "<txm:ana ref=\"#TT1\" type=\"#tt\">" + parser.getAttributeValue(i)+"</txm:ana>\n"
179
                                                                                        break
180
                                                                                default:
181
                                                                                        output.write(" "+parser.getAttributeLocalName(i)+"=\""+parser.getAttributeValue(i)+"\"" );
182
                                                                        }
183
                                                                }
184

    
185
                                                                output.write(">\n");
186
                                                                flagWord = true;
187
                                                                break;
188

    
189
                                                                case "TEI":
190
                                                                output.write("\n<TEI");
191
                                                                for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
192
                                                                        output.write(" "+parser.getAttributeLocalName(i)+"=\""+parser.getAttributeValue(i)+"\"" );
193
                                                                output.write(" xmlns:txm=\"http://textometrie.ens-lyon.fr/1.0\"" );
194
                                                                output.write(">\n");
195
                                                                break;
196
                                                                default:
197
                                                                lastopenlocalname = parser.getLocalName();
198
                                                                output.write("\n<"+parser.getLocalName());
199
                                                                for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
200
                                                                        output.write(" "+parser.getAttributeLocalName(i)+"=\""+parser.getAttributeValue(i)+"\"" );
201
                                                                if(solotags.contains(lastopenlocalname))
202
                                                                        output.write("/>");
203
                                                                else
204
                                                                        output.write(">");
205
                                                                charcount=0;
206
                                                        }
207
                                                        break;
208

    
209
                                                case XMLStreamConstants.END_ELEMENT:
210
                                                        switch (parser.getLocalName()) {
211
                                                                case "w":
212
                                                                output.write("<txm:form>"+vWordNorm+"</txm:form>")
213
                                                                output.write("\n"+anabalises);
214
                                                                output.write("</w>");
215
                                                                vWordNorm = "";
216
                                                                anabalises ="";
217
                                                                flagWord = false;
218
                                                                break;
219
                                                                default:
220
                                                                //if(charcount > 0)
221
                                                                //output.write("\n");
222
                                                                if(!solotags.contains(parser.getLocalName()))
223
                                                                        if(lastopenlocalname.equals(parser.getLocalName()))
224
                                                                                output.write("</"+parser.getLocalName()+">");
225
                                                                        else
226
                                                                                output.write("\n</"+parser.getLocalName()+">");
227
                                                        }
228
                                                        break;
229

    
230
                                                case XMLStreamConstants.CHARACTERS:
231
                                                        if (flagWord) {
232

    
233
                                                                vWordNorm += parser.getText().trim();
234

    
235
                                                        } else
236
                                                        {
237
                                                                String txt = parser.getText().trim();
238
                                                                output.write(txt);
239
                                                                charcount += txt.length();
240
                                                        }
241
                                                        break;
242
                                        }
243
                                }
244

    
245
                                output.close();
246
                                parser.close();
247
                                inputData.close();
248
                        }
249
                        catch (XMLStreamException ex) {
250
                                System.out.println(ex);
251
                        }
252
                        catch (IOException ex) {
253
                                System.out.println("IOException while parsing " + inputData);
254
                        }
255
                }
256
        }
257

    
258
        /**
259
         * The main method.
260
         *
261
         * @param args the arguments
262
         */
263
        public static void main(String[] args) {
264

    
265
                String rootDir = "~/xml/rgaqcj/";
266
                new File(rootDir+"/anainline/").mkdir();
267

    
268
                def files = ["roland.xml","qgraal_cm.xml","artu.xml","qjm.xml","commyn1.xml","jehpar.xml"];
269
                def anafiles = ["roland-ana.xml","qgraal_cm-ana.xml","artu-ana.xml","qjm-ana.xml","commyn1-ana.xml","jehpar-ana.xml"];
270
                ArrayList<String> solotags = new ArrayList<String>();
271
                solotags.add("tagUsage");
272
                solotags.add("pb");
273
                solotags.add("lb");
274
                solotags.add("catRef");
275

    
276
                for(int i=0; i < files.size();i++)
277
                {
278
                        String file = files[i];
279
                        String anafile = anafiles[i];
280
                        def builder = new WLX2TXMTEI(new File(rootDir+"/src/",file).toURL(),solotags);
281
                        builder.transfomFile(rootDir+"/anainline/",anafile);
282
                }
283

    
284

    
285
                return
286
        }
287
}