Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / hyperprince / AnalyzeAlignement.groovy @ 479

History | View | Annotate | Download (7.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

    
22
//
23
// This file is part of the TXM platform.
24
//
25
// The TXM platform is free software: you can redistribute it and/or modif y
26
// it under the terms of the GNU General Public License as published by
27
// the Free Software Foundation, either version 3 of the License, or
28
// (at your option) any later version.
29
//
30
// The TXM platform is distributed in the hope that it will be useful,
31
// but WITHOUT ANY WARRANTY; without even the implied warranty of
32
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33
// GNU General Public License for more details.
34
//
35
// You should have received a copy of the GNU General Public License
36
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
37
// 
38
// 
39
// 
40
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $
41
// $LastChangedRevision: 3087 $
42
// $LastChangedBy: mdecorde $ 
43
//
44
package org.txm.importer.hyperprince
45

    
46
import java.io.File;
47
import java.net.URL;
48
import java.util.ArrayList;
49
import javax.xml.stream.*;
50

    
51
// TODO: Auto-generated Javadoc
52
/**
53
 * The Class AnalyzeAlignement.
54
 */
55
class AnalyzeAlignement {
56
        
57
        /** The url. */
58
        private def url;
59
        
60
        /** The input data. */
61
        private def inputData;
62
        
63
        /** The factory. */
64
        private def factory;
65
        
66
        /** The parser. */
67
        private XMLStreamReader parser;
68
        
69
        /**
70
         * Instantiates a new analyze alignement.
71
         *
72
         * @param file the file
73
         */
74
        public AnalyzeAlignement(File file){
75
                try {
76
                        this.url = file.toURI().toURL();
77
                        inputData = url.openStream();
78
                        factory = XMLInputFactory.newInstance();
79
                        factory.setProperty(XMLInputFactory.IS_VALIDATING,false)
80
                        parser = factory.createXMLStreamReader(inputData);
81
                        
82
                } catch (XMLStreamException ex) {
83
                        System.out.println(ex);
84
                }catch (IOException ex) {
85
                        System.out.println("IOException while parsing ");
86
                }
87
        }
88
        
89
        /**
90
         * Process.
91
         *
92
         * @return the list
93
         */
94
        public List <Set <String>> process(){
95
                List <Set <String>> list = new ArrayList <Set <String>>();
96
                
97
                String localname = "";
98
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
99
                        
100
                        String prefix = parser.getPrefix();
101
                        if(prefix == null || prefix == "")
102
                                prefix = "";
103
                        else
104
                                prefix +=":";
105
                        
106
                        switch (event) {
107
                                case XMLStreamConstants.START_ELEMENT:
108
                                        localname = parser.getLocalName();
109
                                
110
                                // <link targets="#i1_Pr-Seg0 #f1_Pr-Seg0 #f3_Pr-Seg0 #f5_Pr-Seg0 #f4_Pr-Seg0 "/>
111
                                        if(localname == "link"){
112
                                                for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
113
                                                {
114
                                                        String att = parser.getAttributeLocalName(i)
115
                                                        if(att == "targets"){
116
                                                                List<String> vals = parser.getAttributeValue(i).split(" ");
117
                                                                HashSet<String> newVals = new HashSet<String>();
118
                                                                //vals*.trim(); dans le cas du #
119
                                                                vals.each{ it->
120
                                                                        newVals << it.substring(1);
121
                                                                }
122
                                                                list.add(newVals);
123
                                                        }
124
                                                }
125
                                        }
126
                        }
127
                }
128
                parser.close()
129
                return list;
130
        }
131
        
132
        /**
133
         * Adds the alignement.
134
         *
135
         * @param file the file
136
         * @param alignIds the align ids
137
         * @return the file
138
         */
139
        public File addAlignement(File file, HashSet<String> alignIds){
140
                try {
141
                        this.url = file.toURI().toURL();
142
                        inputData = url.openStream();
143
                        factory = XMLInputFactory.newInstance();
144
                        factory.setProperty(XMLInputFactory.IS_VALIDATING,false)
145
                        parser = factory.createXMLStreamReader(inputData);
146
                        
147
                } catch (XMLStreamException ex) {
148
                        System.out.println(ex);
149
                }catch (IOException ex) {
150
                        System.out.println("IOException while parsing ");
151
                }
152
                
153
                /*
154
                 File outfile = new File(outdir,textname+language+".xml");
155
                 Writer output = new OutputStreamWriter(new FileOutputStream(outfile) , "UTF-8");
156
                 def writer = factory.createXMLStreamWriter(output, "UTF-8")*/
157
                String textname = file.getName().substring(0,file.getName().length()-4)+"_";
158
                
159
                // create XML writer
160
                File tempFile = new File(file.getParent(),textname+"temp.xml");
161
                XMLOutputFactory factory = XMLOutputFactory.newInstance();
162
                FileOutputStream output = new FileOutputStream(tempFile)
163
                def writer = factory.createXMLStreamWriter(output, "UTF-8")
164
                
165
                
166
                String localname = "";
167
                writer.writeStartDocument("UTF-8","1.0");
168
                writer.writeStartElement ("TEI");
169
                
170
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
171
                        
172
                        String prefix = parser.getPrefix();
173
                        if(prefix == null || prefix == "")
174
                                prefix = "";
175
                        else
176
                                prefix +=":";
177
                        
178
                        switch (event) {
179
                                case XMLStreamConstants.START_ELEMENT:
180
                                        localname = parser.getLocalName();
181
                                        if(prefix == null){
182
                                                writer.writeStartElement(prefix, localname);
183
                                        }else {
184
                                                writer.writeStartElement(localname);
185
                                        }
186
                                        for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
187
                                        {
188
                                                writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i))
189
                                                if("id" == parser.getAttributeLocalName(i)){
190
                                                        if((parser.getAttributeValue(i)).startsWith("w")){
191
                                                        }else {
192
                                                                String idAlign = searchForAlignId(parser.getAttributeValue(i), alignIds);
193
                                                                writer.writeAttribute("pid", idAlign);
194
                                                                if(idAlign == "-1"){
195
                                                                        System.out.println(parser.getAttributeValue(i)+" - ALIGN ID is ? "+idAlign);
196
                                                                }
197
                                                        }
198
                                                }
199
                                        }
200
                                
201
                                        break;
202
                                case XMLStreamConstants.END_ELEMENT:
203
                                        writer.writeEndElement();
204
                                
205
                                        break;
206
                                case XMLStreamConstants.CHARACTERS:
207
                                        writer.writeCharacters(parser.getText());
208
                                        break;
209
                        }
210
                }
211
                writer.writeEndElement(); // close TEI
212
                parser.close();
213
                writer.close();
214
                output.close()
215
                inputData.close();
216
                return copyfile(tempFile.getAbsolutePath(), file.getAbsolutePath());
217

    
218
        }
219
        
220
        /**
221
         * Copyfile.
222
         *
223
         * @param srFile the sr file
224
         * @param dtFile the dt file
225
         * @return the file
226
         */
227
        private static File copyfile(String srFile, String dtFile){
228
                File f2 ;
229
                try{
230
                        File f1 = new File(srFile);
231
                        f2 = new File(dtFile);
232
                        InputStream inStr = new FileInputStream(f1);
233
                        
234
                        //For Append the file.
235
                        //OutputStream out = new FileOutputStream(f2,true);
236
                        
237
                        //For Overwrite the file.
238
                        OutputStream out = new FileOutputStream(f2);
239
                        
240
                        byte[] buf = new byte[1024];
241
                        int len;
242
                        while ((len = inStr.read(buf)) > 0){
243
                                out.write(buf, 0, len);
244
                        }
245
                        inStr.close();
246
                        out.close();
247
                        System.out.println("File copied.");
248
                }
249
                catch(FileNotFoundException ex){
250
                        System.out.println(ex.getMessage() + " in the specified directory.");
251
                        return null;
252
                }
253
                catch(IOException e){
254
                        System.out.println(e.getMessage());      
255
                }
256
                return f2;
257
        }
258
        
259
        /**
260
         * Search for align id.
261
         *
262
         * @param id the id
263
         * @param alignIds the align ids
264
         * @return the string
265
         */
266
        public String searchForAlignId(String id, HashSet<String> alignIds){
267
                int i = 0;
268
                for(Set<String> set: alignIds){
269
                        i++;
270
                        if(set.contains(id)){
271
                                return ""+i;
272
                        }
273
                }
274
                return "-1";
275
        }
276
        
277
        
278
}