Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / hyperprince / AnalyzeAlignement.groovy @ 187

History | View | Annotate | Download (7.8 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

    
22
//
23
// This file is part of the TXM platform.
24
//
25
// The TXM platform is free software: you can redistribute it and/or modif y
26
// it under the terms of the GNU General Public License as published by
27
// the Free Software Foundation, either version 3 of the License, or
28
// (at your option) any later version.
29
//
30
// The TXM platform is distributed in the hope that it will be useful,
31
// but WITHOUT ANY WARRANTY; without even the implied warranty of
32
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33
// GNU General Public License for more details.
34
//
35
// You should have received a copy of the GNU General Public License
36
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
37
// 
38
// 
39
// 
40
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (Thu, 17 Dec 2015) $
41
// $LastChangedRevision: 3087 $
42
// $LastChangedBy: mdecorde $ 
43
//
44
package org.txm.importer.hyperprince
45

    
46
import java.io.File;
47
import java.net.URL;
48
import java.util.ArrayList;
49
import javax.xml.stream.*;
50

    
51
import org.txm.functions.concordances.comparators.NullComparator;
52

    
53

    
54
// TODO: Auto-generated Javadoc
55
/**
56
 * The Class AnalyzeAlignement.
57
 */
58
class AnalyzeAlignement {
59
        
60
        /** The url. */
61
        private def url;
62
        
63
        /** The input data. */
64
        private def inputData;
65
        
66
        /** The factory. */
67
        private def factory;
68
        
69
        /** The parser. */
70
        private XMLStreamReader parser;
71
        
72
        /**
73
         * Instantiates a new analyze alignement.
74
         *
75
         * @param file the file
76
         */
77
        public AnalyzeAlignement(File file){
78
                try {
79
                        this.url = file.toURI().toURL();
80
                        inputData = url.openStream();
81
                        factory = XMLInputFactory.newInstance();
82
                        factory.setProperty(XMLInputFactory.IS_VALIDATING,false)
83
                        parser = factory.createXMLStreamReader(inputData);
84
                        
85
                } catch (XMLStreamException ex) {
86
                        System.out.println(ex);
87
                }catch (IOException ex) {
88
                        System.out.println("IOException while parsing ");
89
                }
90
        }
91
        
92
        /**
93
         * Process.
94
         *
95
         * @return the list
96
         */
97
        public List <Set <String>> process(){
98
                List <Set <String>> list = new ArrayList <Set <String>>();
99
                
100
                String localname = "";
101
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
102
                        
103
                        String prefix = parser.getPrefix();
104
                        if(prefix == null || prefix == "")
105
                                prefix = "";
106
                        else
107
                                prefix +=":";
108
                        
109
                        switch (event) {
110
                                case XMLStreamConstants.START_ELEMENT:
111
                                        localname = parser.getLocalName();
112
                                
113
                                // <link targets="#i1_Pr-Seg0 #f1_Pr-Seg0 #f3_Pr-Seg0 #f5_Pr-Seg0 #f4_Pr-Seg0 "/>
114
                                        if(localname == "link"){
115
                                                for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
116
                                                {
117
                                                        String att = parser.getAttributeLocalName(i)
118
                                                        if(att == "targets"){
119
                                                                List<String> vals = parser.getAttributeValue(i).split(" ");
120
                                                                HashSet<String> newVals = new HashSet<String>();
121
                                                                //vals*.trim(); dans le cas du #
122
                                                                vals.each{ it->
123
                                                                        newVals << it.substring(1);
124
                                                                }
125
                                                                list.add(newVals);
126
                                                        }
127
                                                }
128
                                        }
129
                        }
130
                }
131
                parser.close()
132
                return list;
133
        }
134
        
135
        /**
136
         * Adds the alignement.
137
         *
138
         * @param file the file
139
         * @param alignIds the align ids
140
         * @return the file
141
         */
142
        public File addAlignement(File file, HashSet<String> alignIds){
143
                try {
144
                        this.url = file.toURI().toURL();
145
                        inputData = url.openStream();
146
                        factory = XMLInputFactory.newInstance();
147
                        factory.setProperty(XMLInputFactory.IS_VALIDATING,false)
148
                        parser = factory.createXMLStreamReader(inputData);
149
                        
150
                } catch (XMLStreamException ex) {
151
                        System.out.println(ex);
152
                }catch (IOException ex) {
153
                        System.out.println("IOException while parsing ");
154
                }
155
                
156
                /*
157
                 File outfile = new File(outdir,textname+language+".xml");
158
                 Writer output = new OutputStreamWriter(new FileOutputStream(outfile) , "UTF-8");
159
                 def writer = factory.createXMLStreamWriter(output, "UTF-8")*/
160
                String textname = file.getName().substring(0,file.getName().length()-4)+"_";
161
                
162
                // create XML writer
163
                File tempFile = new File(file.getParent(),textname+"temp.xml");
164
                XMLOutputFactory factory = XMLOutputFactory.newInstance();
165
                FileOutputStream output = new FileOutputStream(tempFile)
166
                def writer = factory.createXMLStreamWriter(output, "UTF-8")
167
                
168
                
169
                String localname = "";
170
                writer.writeStartDocument("UTF-8","1.0");
171
                writer.writeStartElement ("TEI");
172
                
173
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
174
                        
175
                        String prefix = parser.getPrefix();
176
                        if(prefix == null || prefix == "")
177
                                prefix = "";
178
                        else
179
                                prefix +=":";
180
                        
181
                        switch (event) {
182
                                case XMLStreamConstants.START_ELEMENT:
183
                                        localname = parser.getLocalName();
184
                                        if(prefix == null){
185
                                                writer.writeStartElement(prefix, localname);
186
                                        }else {
187
                                                writer.writeStartElement(localname);
188
                                        }
189
                                        for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
190
                                        {
191
                                                writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i))
192
                                                if("id" == parser.getAttributeLocalName(i)){
193
                                                        if((parser.getAttributeValue(i)).startsWith("w")){
194
                                                        }else {
195
                                                                String idAlign = searchForAlignId(parser.getAttributeValue(i), alignIds);
196
                                                                writer.writeAttribute("pid", idAlign);
197
                                                                if(idAlign == "-1"){
198
                                                                        System.out.println(parser.getAttributeValue(i)+" - ALIGN ID is ? "+idAlign);
199
                                                                }
200
                                                        }
201
                                                }
202
                                        }
203
                                
204
                                        break;
205
                                case XMLStreamConstants.END_ELEMENT:
206
                                        writer.writeEndElement();
207
                                
208
                                        break;
209
                                case XMLStreamConstants.CHARACTERS:
210
                                        writer.writeCharacters(parser.getText());
211
                                        break;
212
                        }
213
                }
214
                writer.writeEndElement(); // close TEI
215
                parser.close();
216
                writer.close();
217
                output.close()
218
                inputData.close();
219
                return copyfile(tempFile.getAbsolutePath(), file.getAbsolutePath());
220

    
221
        }
222
        
223
        /**
224
         * Copyfile.
225
         *
226
         * @param srFile the sr file
227
         * @param dtFile the dt file
228
         * @return the file
229
         */
230
        private static File copyfile(String srFile, String dtFile){
231
                File f2 ;
232
                try{
233
                        File f1 = new File(srFile);
234
                        f2 = new File(dtFile);
235
                        InputStream inStr = new FileInputStream(f1);
236
                        
237
                        //For Append the file.
238
                        //OutputStream out = new FileOutputStream(f2,true);
239
                        
240
                        //For Overwrite the file.
241
                        OutputStream out = new FileOutputStream(f2);
242
                        
243
                        byte[] buf = new byte[1024];
244
                        int len;
245
                        while ((len = inStr.read(buf)) > 0){
246
                                out.write(buf, 0, len);
247
                        }
248
                        inStr.close();
249
                        out.close();
250
                        System.out.println("File copied.");
251
                }
252
                catch(FileNotFoundException ex){
253
                        System.out.println(ex.getMessage() + " in the specified directory.");
254
                        return null;
255
                }
256
                catch(IOException e){
257
                        System.out.println(e.getMessage());      
258
                }
259
                return f2;
260
        }
261
        
262
        /**
263
         * Search for align id.
264
         *
265
         * @param id the id
266
         * @param alignIds the align ids
267
         * @return the string
268
         */
269
        public String searchForAlignId(String id, HashSet<String> alignIds){
270
                int i = 0;
271
                for(Set<String> set: alignIds){
272
                        i++;
273
                        if(set.contains(id)){
274
                                return ""+i;
275
                        }
276
                }
277
                return "-1";
278
        }
279
        
280
        
281
}