Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / ImportXmlTag.groovy @ 479

History | View | Annotate | Download (5.6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer;
29

    
30
import java.text.DateFormat;
31
import java.util.Date;
32
import java.util.ArrayList;
33
import java.util.LinkedList;
34
import javax.xml.stream.*;
35
import java.net.URL;
36
import org.txm.importer.filters.*;
37
import org.txm.importer.cwb.CwbEncode
38
import org.txm.importer.cwb.CwbMakeAll
39
// TODO: Auto-generated Javadoc
40

    
41
/**
42
 * The Class ImportXmlTag.
43
 *
44
 * @author mdecorde
45
 */
46
public class ImportXmlTag {
47
        
48
        /** The url. */
49
        private def url;
50
        
51
        /** The input data. */
52
        private def inputData;
53
        
54
        /** The factory. */
55
        private def factory;
56
        
57
        /** The parser. */
58
        private XMLStreamReader parser;
59
        
60
        /** The output. */
61
        private def output;
62
        
63
        /**
64
         * Instantiates a new import xml tag.
65
         *
66
         * @param url the url
67
         */
68
        public ImportXmlTag(URL url){
69
                try {
70
                        this.url = url;
71
                        inputData = url.openStream();
72
                        factory = XMLInputFactory.newInstance();
73
                        parser = factory.createXMLStreamReader(inputData);
74
                        
75
                } catch (XMLStreamException ex) {
76
                        System.out.println(ex);
77
                }catch (IOException ex) {
78
                        System.out.println("IOException while parsing ");
79
                }
80
        }
81
        
82
        /**
83
         * Creates the output.
84
         *
85
         * @param outfile the outfile
86
         * @return true, if successful
87
         */
88
        private boolean createOutput(File outfile){
89
                try {
90
                        File f = outfile;
91
                        output = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
92
                        
93
                        return true;
94
                } catch (Exception e) {
95
                        System.out.println(e.getLocalizedMessage());
96
                        return false;
97
                }
98
        }
99
        
100
        /** The maxprof. */
101
        int maxprof = 0;
102
        
103
        /**
104
         * Process.
105
         *
106
         * @param outfile the outfile
107
         * @return true, if successful
108
         */
109
        public boolean process(File outfile){
110
                LinkedList<Integer> writechilds = new LinkedList<Integer>();
111
                writechilds.add(0);
112
                String prof = "";
113
                int wordid =0
114
                
115
                def children = 0;
116
                if(createOutput(outfile)){
117
                        
118
                        String localname = "";
119
                        try {
120
                                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
121
                                        
122
                                        String prefix = parser.getPrefix();
123
                                        if(prefix == null || prefix == "")
124
                                                prefix = "none";
125
                                        
126
                                        switch (event) {
127
                                        
128
                                                case XMLStreamConstants.START_ELEMENT:
129
                                                        localname = parser.getLocalName();
130
                                                        if (writechilds.getLast() == 0) {
131
                                                                children++;
132
                                                                output.write(prof+"<children n="+children+">\n");
133
                                                        }
134
                                                        
135
                                                        int cpt = writechilds.pop();
136
                                                        cpt++;
137
                                                        writechilds.add(cpt);
138

    
139
                                                        prof+=" ";
140
                                                        maxprof = Math.max(maxprof,prof.length());
141
                                                        output.write(prof+localname+"\t"+(wordid++)+"\t"+prefix+"\t"+prof.length()+"\t"+parser.getAttributeCount()+"\n");
142

    
143
                                                        writechilds.add(0);
144
                                                        break;        
145
                                                
146
                                                case XMLStreamConstants.END_ELEMENT:
147
                                                        localname = parser.getLocalName();
148
                                                        if (writechilds.getLast() > 0) {
149
                                                                output.write(prof+"</children>\n");
150
                                                        }
151
                                                        writechilds.removeLast();
152
                                                        prof = prof.subSequence(0, prof.length() -1);
153

    
154
                                                        break;
155
                                                
156
                                                case XMLStreamConstants.CHARACTERS:                
157
                                                        break;
158
                                        }
159
                                }
160
                                output.write(prof+"</children>\n");
161
                                output.close();
162
                                parser.close();
163
                        }
164
                        catch (XMLStreamException ex) {
165
                                System.out.println(ex);
166
                        }
167
                        catch (IOException ex) {
168
                                System.out.println("IOException while parsing " + inputData);
169
                        }
170
                }
171
        }
172
        
173
        
174
        /**
175
         * The main method.
176
         *
177
         * @param args the arguments
178
         */
179
        public static void main(String[] args) {
180
                
181
                String rootDir = "~/xml/rgaqcj/";
182
                new File(rootDir+"/wtc/").mkdir();
183
                new File(rootDir,"/registry/").mkdir();
184
                
185
                File srcfile = new File(rootDir+"/src/","roland.xml");
186
                File resultfile = new File(rootDir+"/wtc/","roland-xmltag.wtc");
187
                println("importXmlTag : "+srcfile+" to : "+resultfile );
188
                
189
                def builder = new ImportXmlTag(srcfile.toURL());
190
                builder.process(resultfile);
191
                
192
                def cwbLoc ="~/Bureau/textometrie/CWB/cwb-3.0/utils/";//chemin vers executable cqp
193
                
194
                def inDir = rootDir;
195
                def outDir =rootDir+"wtc/";
196
                def outDirTxm = rootDir;
197
                CwbEncode cwbEn = new CwbEncode();
198
                CwbMakeAll cwbMa = new CwbMakeAll();
199
                
200
                String[] pAttributes = ["id","prefix","prof","nbattr"];
201
                String[] sAttributes = ["children:"+builder.maxprof+"+n"];
202
                
203
                try {
204
                        if (System.getProperty("os.name").contains("Windows")) {
205
                                cwbEn.run(cwbLoc + "cwb-encode.exe", outDirTxm + "data/"+"ROLANDXML", outDir +"roland-xmltag.wtc", outDirTxm + "registry/"+"rolandxml",pAttributes, sAttributes);
206
                                cwbMa.run(cwbLoc + "cwb-makeall.exe", "ROLANDXML", outDirTxm + "registry");
207
                        } else {
208
                                cwbEn.run(cwbLoc + "cwb-encode", outDirTxm + "data/"+"ROLANDXML", outDir +"roland-xmltag.wtc", outDirTxm + "registry/"+"rolandxml",pAttributes, sAttributes);
209
                                cwbMa.run(cwbLoc + "cwb-makeall", "ROLANDXML", outDirTxm + "registry");
210
                        }
211
                } catch (Exception ex) {System.out.println(ex);}
212
                
213
                return;
214
        }
215
}
216