Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / fleurs / pager.groovy @ 479

History | View | Annotate | Download (8.5 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.fleurs;
29

    
30
import org.txm.importer.*;
31
import org.xml.sax.Attributes;
32
import org.txm.importer.filters.*;
33
import java.io.File;
34
import java.io.IOException;
35
import java.util.ArrayList;
36
import javax.xml.parsers.SAXParserFactory;
37
import javax.xml.parsers.ParserConfigurationException;
38
import javax.xml.parsers.SAXParser;
39
import javax.xml.stream.*;
40
import java.net.URL;
41
import org.xml.sax.InputSource;
42
import org.xml.sax.helpers.DefaultHandler;
43

    
44
// TODO: Auto-generated Javadoc
45
/** Build Discours corpus simple edition from a xml-tei. @author mdecorde */
46
class pager {
47
        List<String> NoSpaceBefore;
48
        
49
        /** The No space after. */
50
        List<String> NoSpaceAfter;
51

    
52
        /** The wordcount. */
53
        int wordcount = 0;
54
        
55
        /** The pagecount. */
56
        int pagecount = 0;
57
        
58
        /** The wordmax. */
59
        int wordmax = 0;
60

    
61
        /** The basename. */
62
        String basename;
63
        
64
        /** The wordid. */
65
        String wordid;
66
        
67
        /** The first word. */
68
        boolean firstWord = true;
69
        
70
        /** The wordvalue. */
71
        String wordvalue;
72
        
73
        /** The interpvalue. */
74
        String interpvalue;
75
        
76
        /** The lastword. */
77
        String lastword = " ";
78
        
79
        /** The wordtype. */
80
        String wordtype;
81
        
82
        /** The flagform. */
83
        boolean flagform = false;
84
        
85
        /** The flaginterp. */
86
        boolean flaginterp = false;
87
        
88
        /** The url. */
89
        private def url;
90
        
91
        /** The input data. */
92
        private def inputData;
93
        
94
        /** The factory. */
95
        private def factory;
96
        
97
        /** The parser. */
98
        private XMLStreamReader parser;
99
        
100
        /** The writer. */
101
        OutputStreamWriter writer;
102
        
103
        /** The multiwriter. */
104
        OutputStreamWriter multiwriter = null;
105
        
106
        /** The infile. */
107
        File infile;
108
        
109
        /** The outfile. */
110
        File outfile;
111
        
112
        /** The pages. */
113
        ArrayList<File> pages = new ArrayList<File>();
114
        
115
        /** The idxstart. */
116
        ArrayList<String> idxstart = new ArrayList<String>();
117

    
118
        /**
119
         * Instantiates a new pager.
120
         *
121
         * @param infile the infile
122
         * @param outfile the outfile
123
         * @param NoSpaceBefore the no space before
124
         * @param NoSpaceAfter the no space after
125
         * @param max the max
126
         * @param basename the basename
127
         */
128
        pager(File infile, File outfile, List<String> NoSpaceBefore,
129
                        List<String> NoSpaceAfter, int max, String basename) {
130
                this.basename = basename;
131
                this.wordmax = max;
132
                this.NoSpaceBefore = NoSpaceBefore;
133
                this.NoSpaceAfter = NoSpaceAfter;
134
                this.url = infile.toURI().toURL();
135
                this.infile = infile;
136

    
137
                inputData = url.openStream();
138
                factory = XMLInputFactory.newInstance();
139
                parser = factory.createXMLStreamReader(inputData);
140
                createOutput(outfile);
141
                process();
142
        }
143

    
144
        /**
145
         * Creates the next output.
146
         *
147
         * @return true, if successful
148
         */
149
        private boolean createNextOutput()
150
        {
151
                try {
152
                        if(multiwriter != null)
153
                        {
154
                                multiwriter.write("</body>");
155
                                multiwriter.write("</html>");
156
                                multiwriter.close();
157
                        }
158
                        pagecount++;
159
                        File f = new File(outfile.getParent()+"/multi/",outfile.getName().substring(0,outfile.getName().length()-5)+"_"+pagecount+".html");
160
                        pages.add(f);
161
                        idxstart.add(wordid)
162
                        multiwriter = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
163
                        
164
                        multiwriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
165
                        multiwriter.write("<html>");
166
                        multiwriter.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>");
167
                        multiwriter.write("<head>");
168
                        multiwriter.write("<title>"+basename.toUpperCase()+" Edition - Page "+pagecount+"</title>");
169
                        multiwriter.write("</head>");
170
                        multiwriter.write("<body>");
171
                        
172
                        return true;
173
                } catch (Exception e) {
174
                        System.out.println(e.getLocalizedMessage());
175
                        return false;
176
                }
177
        }
178

    
179
        /**
180
         * Creates the output.
181
         *
182
         * @param outfile the outfile
183
         * @return true, if successful
184
         */
185
        private boolean createOutput(File outfile) {
186
                try {
187
                        this.outfile = outfile;
188

    
189
                        writer = new OutputStreamWriter(new FileOutputStream(outfile),
190
                                        "UTF-8");
191
                        // createNextOutput();
192
                        return true;
193
                } catch (Exception e) {
194
                        System.out.println(e.getLocalizedMessage());
195
                        return false;
196
                }
197
        }
198

    
199
        /**
200
         * Gets the page files.
201
         *
202
         * @return the page files
203
         */
204
        public ArrayList<File> getPageFiles() {
205
                return pages;
206
        }
207

    
208
        /**
209
         * Gets the idx.
210
         *
211
         * @return the idx
212
         */
213
        public ArrayList<String> getIdx() {
214
                return idxstart;
215
        }
216

    
217
        /**
218
         * Process.
219
         */
220
        void process()
221
        {
222
                String localname = "";
223
                
224
                writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
225
                writer.write("<html>");
226
                writer.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>");
227
                writer.write("<head>");
228
                writer.write("<title>"+basename.toUpperCase()+" edition</title>");
229
                writer.write("</head>");
230
                writer.write("<body>");
231
                
232
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
233
                        
234
                        
235
                        switch (event) {
236
                                case XMLStreamConstants.START_ELEMENT:
237
                                        localname = parser.getLocalName();
238
                                        switch(localname)
239
                                        {
240
                                                case "poeme":
241
                                                        createNextOutput();
242
                                                writer.write("<h1 class=\"text\">"+parser.getAttributeValue(null,"titre")+"</h1>")
243
                                                writer.write("<ul>")
244
                                                for(int i =0 ; i < parser.getAttributeCount() ; i++)
245
                                                        writer.write("<li>"+parser.getAttributeLocalName(i)+" : "+parser.getAttributeValue(i)+"</li>");
246
                                                writer.write("</ul><br/>")
247
                                                
248
                                                multiwriter.write("<h1 class=\"text\">"+parser.getAttributeValue(null,"titre")+"</h1>")
249
                                                multiwriter.write("<ul>")
250
                                                for(int i =0 ; i < parser.getAttributeCount() ; i++)
251
                                                        multiwriter.write("<li>"+parser.getAttributeLocalName(i)+" : "+parser.getAttributeValue(i)+"</li>");
252
                                                multiwriter.write("</ul><br/>")
253
                                                break;
254
                                                
255
                                                case "vers":
256
                                                        multiwriter.write("<br/>")
257
                                                        writer.write("<br/>")
258
                                                        break;
259
                                                        
260
                                                case "strophe":
261
                                                        multiwriter.write("<br/>")
262
                                                        writer.write("<br/>")
263
                                                        break;
264
                                                        
265
                                                case "w":
266
                                                wordid=parser.getAttributeValue(null,"id");
267
                                                if(firstWord)
268
                                                {
269
                                                        firstWord=false;
270
                                                        this.idxstart.set(0,wordid);
271
                                                }
272
                                                interpvalue = "";
273
                                                for(int i = 0 ; i < parser.getAttributeCount() ; i++)
274
                                                {
275
                                                        if(parser.getAttributeLocalName(i) != "id")
276
                                                                interpvalue += " "+parser.getAttributeLocalName(i)+":"+parser.getAttributeValue(i);
277
                                                }
278

    
279
                                                flagform = true;
280
                                                wordvalue = "";
281
                                                break;
282

    
283
                                        }
284
                                        break;
285
                                case XMLStreamConstants.END_ELEMENT:
286
                                        localname = parser.getLocalName();
287
                                        switch(localname)
288
                                        {
289
                                                
290
                                                case "w":
291
                                                        int l = lastword.length();
292
                                                        String endOfLastWord = "";
293
                                                        if(l > 0)
294
                                                                endOfLastWord = lastword.subSequence(l-1, l);
295
                                                        
296
                                                        if(interpvalue != null)
297
                                                                interpvalue = interpvalue.replace("\"","&quot;");
298
                                                        
299
                                                        if(NoSpaceBefore.contains(wordvalue) || 
300
                                                                        NoSpaceAfter.contains(lastword) || 
301
                                                                        wordvalue.startsWith("-") || 
302
                                                                        NoSpaceAfter.contains(endOfLastWord))
303
                                                        {
304
                                                                writer.write("<span title=\""+interpvalue+"\" id=\""+wordid+"\">");
305
                                                                multiwriter.write("<span title=\""+interpvalue+"\" id=\""+wordid+"\">");
306
                                                        }
307
                                                        else
308
                                                        {
309
                                                                writer.write(" <span title=\""+interpvalue+"\" id=\""+wordid+"\">");
310
                                                                multiwriter.write(" <span title=\""+interpvalue+"\" id=\""+wordid+"\">");
311
                                                        }
312
                                                        writer.write(wordvalue.replace("<", "&lt;")+"</span>");
313
                                                        multiwriter.write(wordvalue.replace("<", "&lt;")+"</span>");
314
                                                        lastword=wordvalue;
315
                                                flagform = false;
316
                                                break;
317
                                        }
318
                                        break;
319
                                
320
                                case XMLStreamConstants.CHARACTERS:
321
                                        if(flagform)
322
                                                if(parser.getText().length() > 0)
323
                                                        wordvalue+=(parser.getText());
324
                                        break;
325
                        }
326
                }        
327
                writer.write("</body>");
328
                writer.write("</html>");
329
                writer.close();
330
                multiwriter.write("</body>");
331
                multiwriter.write("</html>");
332
                multiwriter.close();
333
                inputData.close();
334
        }
335
}