root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / bfm / pager.groovy @ 1688
History | View | Annotate | Download (15.5 kB)
1 | 321 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 321 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 321 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 321 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 321 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 321 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 321 | mdecorde | // later version.
|
11 | 321 | mdecorde | //
|
12 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 321 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 321 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 321 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 321 | mdecorde | // details.
|
17 | 321 | mdecorde | //
|
18 | 321 | mdecorde | // You should have received a copy of the GNU General
|
19 | 321 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 321 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 321 | mdecorde | |
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | // This file is part of the TXM platform.
|
24 | 321 | mdecorde | //
|
25 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it and/or modif y
|
26 | 321 | mdecorde | // it under the terms of the GNU General Public License as published by
|
27 | 321 | mdecorde | // the Free Software Foundation, either version 3 of the License, or
|
28 | 321 | mdecorde | // (at your option) any later version.
|
29 | 321 | mdecorde | //
|
30 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be useful,
|
31 | 321 | mdecorde | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
32 | 321 | mdecorde | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
33 | 321 | mdecorde | // GNU General Public License for more details.
|
34 | 321 | mdecorde | //
|
35 | 321 | mdecorde | // You should have received a copy of the GNU General Public License
|
36 | 321 | mdecorde | // along with the TXM platform. If not, see <http://www.gnu.org/licenses/>.
|
37 | 321 | mdecorde | //
|
38 | 321 | mdecorde | //
|
39 | 321 | mdecorde | //
|
40 | 479 | mdecorde | // $LastChangedDate: 2017-05-02 11:55:17 +0200 (mar. 02 mai 2017) $
|
41 | 479 | mdecorde | // $LastChangedRevision: 3436 $
|
42 | 321 | mdecorde | // $LastChangedBy: mdecorde $
|
43 | 321 | mdecorde | //
|
44 | 986 | mdecorde | package org.txm.scripts.importer.bfm;
|
45 | 321 | mdecorde | |
46 | 986 | mdecorde | import org.txm.scripts.importer.*; |
47 | 321 | mdecorde | import org.xml.sax.Attributes; |
48 | 1000 | mdecorde | import org.txm.importer.scripts.filters.*; |
49 | 321 | mdecorde | import java.io.File; |
50 | 321 | mdecorde | import java.io.IOException; |
51 | 321 | mdecorde | import java.util.ArrayList; |
52 | 321 | mdecorde | import javax.xml.parsers.SAXParserFactory; |
53 | 321 | mdecorde | import javax.xml.parsers.ParserConfigurationException; |
54 | 321 | mdecorde | import javax.xml.parsers.SAXParser; |
55 | 321 | mdecorde | import javax.xml.stream.*; |
56 | 321 | mdecorde | import java.net.URL; |
57 | 321 | mdecorde | import org.xml.sax.InputSource; |
58 | 321 | mdecorde | import org.xml.sax.helpers.DefaultHandler; |
59 | 321 | mdecorde | |
60 | 321 | mdecorde | // TODO: Auto-generated Javadoc
|
61 | 321 | mdecorde | /**
|
62 | 321 | mdecorde | * Build BFM texts HTML edition from the TEI-TXM files. <br/>
|
63 | 321 | mdecorde | * The result is similar to the one of the XSLT script made by Alexis Lavrentiev
|
64 | 321 | mdecorde | * (BFM & ICAR3). <br/>
|
65 | 321 | mdecorde | *
|
66 | 321 | mdecorde | * @author mdecorde
|
67 | 321 | mdecorde | */
|
68 | 321 | mdecorde | class pager { |
69 | 321 | mdecorde | private List<String> NoSpaceBefore; |
70 | 321 | mdecorde | |
71 | 321 | mdecorde | /** The No space after. */
|
72 | 321 | mdecorde | private List<String> NoSpaceAfter; |
73 | 321 | mdecorde | |
74 | 321 | mdecorde | /** The wordcount. */
|
75 | 321 | mdecorde | private int wordcount = 0; |
76 | 321 | mdecorde | |
77 | 321 | mdecorde | /** The pagecount. */
|
78 | 321 | mdecorde | private int pagecount = 0; |
79 | 321 | mdecorde | |
80 | 321 | mdecorde | /** The wordmax. */
|
81 | 321 | mdecorde | private int wordmax = 0; |
82 | 321 | mdecorde | |
83 | 321 | mdecorde | /** The wordid. */
|
84 | 321 | mdecorde | private String wordid; |
85 | 321 | mdecorde | |
86 | 321 | mdecorde | /** The first word. */
|
87 | 321 | mdecorde | private boolean firstWord = true; |
88 | 321 | mdecorde | |
89 | 321 | mdecorde | /** The wordvalue. */
|
90 | 321 | mdecorde | private String wordvalue; |
91 | 321 | mdecorde | |
92 | 321 | mdecorde | /** The interpvalue. */
|
93 | 321 | mdecorde | private String interpvalue; |
94 | 321 | mdecorde | |
95 | 321 | mdecorde | /** The lastword. */
|
96 | 321 | mdecorde | private String lastword = " "; |
97 | 321 | mdecorde | |
98 | 321 | mdecorde | /** The wordtype. */
|
99 | 321 | mdecorde | private String wordtype; |
100 | 321 | mdecorde | |
101 | 321 | mdecorde | /** The flagform. */
|
102 | 321 | mdecorde | private boolean flagform = false; |
103 | 321 | mdecorde | |
104 | 321 | mdecorde | /** The flaginterp. */
|
105 | 321 | mdecorde | private boolean flaginterp = false; |
106 | 321 | mdecorde | |
107 | 321 | mdecorde | /** The url. */
|
108 | 321 | mdecorde | private URL url; |
109 | 321 | mdecorde | |
110 | 321 | mdecorde | /** The input data. */
|
111 | 321 | mdecorde | private def inputData; |
112 | 321 | mdecorde | |
113 | 321 | mdecorde | /** The factory. */
|
114 | 321 | mdecorde | private def factory; |
115 | 321 | mdecorde | |
116 | 321 | mdecorde | /** The parser. */
|
117 | 321 | mdecorde | private XMLStreamReader parser;
|
118 | 321 | mdecorde | |
119 | 321 | mdecorde | /** The writer. */
|
120 | 321 | mdecorde | private OutputStreamWriter writer; |
121 | 321 | mdecorde | |
122 | 321 | mdecorde | /** The multiwriter. */
|
123 | 321 | mdecorde | private OutputStreamWriter multiwriter = null; |
124 | 321 | mdecorde | StaxStackWriter pagedWriter = null;
|
125 | 321 | mdecorde | |
126 | 321 | mdecorde | /** The infile. */
|
127 | 321 | mdecorde | private File infile; |
128 | 321 | mdecorde | |
129 | 321 | mdecorde | /** The outfile. */
|
130 | 321 | mdecorde | private File outfile; |
131 | 321 | mdecorde | private File outDir; |
132 | 321 | mdecorde | |
133 | 321 | mdecorde | /** The pages. */
|
134 | 321 | mdecorde | ArrayList<File> pages = new ArrayList<File>(); |
135 | 321 | mdecorde | |
136 | 321 | mdecorde | /** The idxstart. */
|
137 | 321 | mdecorde | ArrayList<String> idxstart = new ArrayList<String>(); |
138 | 321 | mdecorde | |
139 | 321 | mdecorde | /** The titre id. */
|
140 | 321 | mdecorde | private String titreId; |
141 | 321 | mdecorde | |
142 | 321 | mdecorde | private String basename; |
143 | 321 | mdecorde | private String textname; |
144 | 321 | mdecorde | |
145 | 321 | mdecorde | private def xpathProperties; |
146 | 321 | mdecorde | |
147 | 321 | mdecorde | /**
|
148 | 321 | mdecorde | * Instantiates a new pager.
|
149 | 321 | mdecorde | *
|
150 | 321 | mdecorde | * @param infile the file to convert
|
151 | 321 | mdecorde | * @param outfile the result file
|
152 | 321 | mdecorde | * @param NoSpaceBefore the punctuation marks that don't have a space before
|
153 | 321 | mdecorde | * @param NoSpaceAfter the punctuation marks that don't have a space after
|
154 | 321 | mdecorde | * @param max the max number of word per page
|
155 | 321 | mdecorde | */
|
156 | 321 | mdecorde | pager(File infile, File outDir, String textname, List<String> NoSpaceBefore, |
157 | 321 | mdecorde | List<String> NoSpaceAfter, int max, String basename, Properties xpathprops) { |
158 | 321 | mdecorde | this.wordmax = max;
|
159 | 321 | mdecorde | this.NoSpaceBefore = NoSpaceBefore;
|
160 | 321 | mdecorde | this.NoSpaceAfter = NoSpaceAfter;
|
161 | 321 | mdecorde | this.basename = basename;
|
162 | 321 | mdecorde | this.textname = textname;
|
163 | 321 | mdecorde | this.outDir = outDir;
|
164 | 321 | mdecorde | this.xpathProperties = xpathprops;
|
165 | 321 | mdecorde | this.url = infile.toURI().toURL();
|
166 | 321 | mdecorde | this.infile = infile;
|
167 | 321 | mdecorde | |
168 | 321 | mdecorde | inputData = url.openStream(); |
169 | 321 | mdecorde | factory = XMLInputFactory.newInstance(); |
170 | 321 | mdecorde | parser = factory.createXMLStreamReader(inputData); |
171 | 321 | mdecorde | process(); |
172 | 321 | mdecorde | } |
173 | 321 | mdecorde | |
174 | 321 | mdecorde | private def closeMultiWriter() |
175 | 321 | mdecorde | { |
176 | 321 | mdecorde | if (pagedWriter != null) { |
177 | 321 | mdecorde | def tags = pagedWriter.getTagStack().clone();
|
178 | 321 | mdecorde | |
179 | 321 | mdecorde | if (firstWord) { // there was no words |
180 | 321 | mdecorde | this.idxstart.add("w_0") |
181 | 321 | mdecorde | pagedWriter.write("<span id=\"w_0\"/>");
|
182 | 321 | mdecorde | } |
183 | 321 | mdecorde | pagedWriter.writeEndElements(); |
184 | 321 | mdecorde | pagedWriter.close(); |
185 | 321 | mdecorde | return tags;
|
186 | 321 | mdecorde | } else {
|
187 | 321 | mdecorde | return []; |
188 | 321 | mdecorde | } |
189 | 321 | mdecorde | } |
190 | 321 | mdecorde | |
191 | 321 | mdecorde | /**
|
192 | 321 | mdecorde | * Creates the next output.
|
193 | 321 | mdecorde | *
|
194 | 321 | mdecorde | * @return true, if successful
|
195 | 321 | mdecorde | */
|
196 | 321 | mdecorde | private boolean createNextOutput() |
197 | 321 | mdecorde | { |
198 | 321 | mdecorde | wordcount = 0;
|
199 | 321 | mdecorde | try {
|
200 | 321 | mdecorde | def tags = closeMultiWriter();
|
201 | 321 | mdecorde | for (int i = 0 ; i < tags.size() ; i++) { |
202 | 321 | mdecorde | String tag = tags[i]
|
203 | 321 | mdecorde | if ("body" != tag) { |
204 | 321 | mdecorde | tags.remove(i--) |
205 | 321 | mdecorde | } else {
|
206 | 321 | mdecorde | tags.remove(i--) // remove "body"
|
207 | 321 | mdecorde | break; // remove elements until "body tag |
208 | 321 | mdecorde | } |
209 | 321 | mdecorde | } |
210 | 321 | mdecorde | File outfile = new File(outDir, textname+"_"+(++pagecount)+".html") |
211 | 321 | mdecorde | pages.add(outfile); |
212 | 321 | mdecorde | firstWord = true; // waiting for next word |
213 | 321 | mdecorde | |
214 | 321 | mdecorde | pagedWriter = new StaxStackWriter(outfile, "UTF-8"); |
215 | 321 | mdecorde | |
216 | 321 | mdecorde | pagedWriter.writeStartDocument("UTF-8", "1.0") |
217 | 321 | mdecorde | pagedWriter.writeStartElement("html")
|
218 | 321 | mdecorde | pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]); |
219 | 321 | mdecorde | pagedWriter.writeStartElement("head")
|
220 | 321 | mdecorde | pagedWriter.writeStartElement("title")
|
221 | 321 | mdecorde | pagedWriter.writeCharacters(basename.toUpperCase()+" - "+textname+" - Edition - Page "+pagecount) |
222 | 321 | mdecorde | pagedWriter.writeEndElement() // </title>
|
223 | 321 | mdecorde | pagedWriter.writeEndElement() // </head>
|
224 | 321 | mdecorde | pagedWriter.writeStartElement("body") //<body> |
225 | 321 | mdecorde | pagedWriter.writeStartElements(tags); |
226 | 321 | mdecorde | return true; |
227 | 321 | mdecorde | } catch (Exception e) { |
228 | 321 | mdecorde | System.out.println(e.getLocalizedMessage());
|
229 | 321 | mdecorde | return false; |
230 | 321 | mdecorde | } |
231 | 321 | mdecorde | } |
232 | 321 | mdecorde | |
233 | 321 | mdecorde | /**
|
234 | 321 | mdecorde | * Creates the output.
|
235 | 321 | mdecorde | *
|
236 | 321 | mdecorde | * @param outfile the outfile
|
237 | 321 | mdecorde | * @return true, if successful
|
238 | 321 | mdecorde | */
|
239 | 321 | mdecorde | private boolean createOutput(File outfile) { |
240 | 321 | mdecorde | try {
|
241 | 321 | mdecorde | return createNextOutput();
|
242 | 321 | mdecorde | } catch (Exception e) { |
243 | 321 | mdecorde | System.out.println(e.getLocalizedMessage());
|
244 | 321 | mdecorde | return false; |
245 | 321 | mdecorde | } |
246 | 321 | mdecorde | } |
247 | 321 | mdecorde | |
248 | 321 | mdecorde | /**
|
249 | 321 | mdecorde | * Gets the page files.
|
250 | 321 | mdecorde | *
|
251 | 321 | mdecorde | * @return the pages create during the processing
|
252 | 321 | mdecorde | */
|
253 | 321 | mdecorde | public ArrayList<File> getPageFiles() { |
254 | 321 | mdecorde | return pages;
|
255 | 321 | mdecorde | } |
256 | 321 | mdecorde | |
257 | 321 | mdecorde | /**
|
258 | 321 | mdecorde | * Gets the idx.
|
259 | 321 | mdecorde | *
|
260 | 321 | mdecorde | * @return the word's indexes
|
261 | 321 | mdecorde | */
|
262 | 321 | mdecorde | public ArrayList<String> getIdx() { |
263 | 321 | mdecorde | return idxstart;
|
264 | 321 | mdecorde | } |
265 | 321 | mdecorde | |
266 | 321 | mdecorde | /**
|
267 | 321 | mdecorde | * process the infile.
|
268 | 321 | mdecorde | */
|
269 | 321 | mdecorde | void process() {
|
270 | 321 | mdecorde | String localname = ""; |
271 | 321 | mdecorde | |
272 | 321 | mdecorde | boolean closeTitre = false; |
273 | 321 | mdecorde | boolean closeAmen = false; |
274 | 321 | mdecorde | |
275 | 321 | mdecorde | boolean flagchoice = false; |
276 | 321 | mdecorde | boolean flagcorr = false; |
277 | 321 | mdecorde | boolean flagsic = false; |
278 | 321 | mdecorde | boolean flagreg = false; |
279 | 321 | mdecorde | boolean flagexpan = false; |
280 | 321 | mdecorde | boolean flagorig = false; |
281 | 321 | mdecorde | boolean flagabbr = false; |
282 | 321 | mdecorde | boolean flagforeign = false; |
283 | 321 | mdecorde | int levelSupplied = 0; |
284 | 321 | mdecorde | boolean flagSurplus = false; |
285 | 321 | mdecorde | boolean flagDel = false; |
286 | 321 | mdecorde | |
287 | 321 | mdecorde | // option for metadata from BFM TEI header
|
288 | 321 | mdecorde | def mValues = [:];
|
289 | 321 | mdecorde | def xpathprocessor = new XPathResult(infile) |
290 | 321 | mdecorde | for (String name : xpathProperties.keySet()) { |
291 | 321 | mdecorde | String value = xpathprocessor.getXpathResponse(xpathProperties.get(name), "N/A"); |
292 | 321 | mdecorde | mValues.put(name, value.replace("\n", " ")); |
293 | 321 | mdecorde | } |
294 | 321 | mdecorde | if (mValues.containsKey("forme") && !mValues.get("forme").equals("N/A")) { |
295 | 321 | mdecorde | if (mValues.get("forme").startsWith("#forme_")) { |
296 | 321 | mdecorde | mValues.put("forme", mValues.get("forme").substring(7)); |
297 | 321 | mdecorde | } else if (mValues.get("forme").startsWith("#")) { |
298 | 321 | mdecorde | mValues.put("forme", mValues.get("forme").substring(1)); |
299 | 321 | mdecorde | } else {
|
300 | 321 | mdecorde | mValues.put("forme", mValues.get("forme")); |
301 | 321 | mdecorde | } |
302 | 321 | mdecorde | } |
303 | 321 | mdecorde | createNextOutput(); |
304 | 321 | mdecorde | for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
305 | 321 | mdecorde | { |
306 | 321 | mdecorde | switch (event) {
|
307 | 321 | mdecorde | case XMLStreamConstants.START_ELEMENT:
|
308 | 321 | mdecorde | localname = parser.getLocalName(); |
309 | 321 | mdecorde | switch (localname) {
|
310 | 321 | mdecorde | case "text": |
311 | 321 | mdecorde | pagedWriter.writeStartElement("h1",
|
312 | 321 | mdecorde | ["align":"center", "style":"color:darkviolet;font-family:Times;font-weight:bold;font-style:italic;font-size=200%", "class":"text"]) |
313 | 321 | mdecorde | pagedWriter.writeCharacters(mValues.get("titre"))
|
314 | 321 | mdecorde | pagedWriter.writeEndElement(); // h1
|
315 | 321 | mdecorde | pagedWriter.writeStartElement("p")
|
316 | 321 | mdecorde | pagedWriter.writeEmptyElement("hr")
|
317 | 321 | mdecorde | pagedWriter.writeCharacters("auteur : "+mValues.get("auteur")) |
318 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
319 | 321 | mdecorde | pagedWriter.writeCharacters("date de composition : "+mValues.get("datecompolibre")); |
320 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
321 | 321 | mdecorde | pagedWriter.writeCharacters("domaine : "+mValues.get("domaine")) |
322 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
323 | 321 | mdecorde | pagedWriter.writeCharacters("genre : "+mValues.get("genre")) |
324 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
325 | 321 | mdecorde | pagedWriter.writeCharacters("forme : "+mValues.get("forme")) |
326 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
327 | 321 | mdecorde | pagedWriter.writeCharacters("dialecte : "+mValues.get("dialecte")) |
328 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
329 | 321 | mdecorde | pagedWriter.writeEmptyElement("hr")
|
330 | 321 | mdecorde | pagedWriter.writeEndElement(); // p
|
331 | 321 | mdecorde | break;
|
332 | 321 | mdecorde | case "head": |
333 | 321 | mdecorde | pagedWriter.writeStartElement("h2")
|
334 | 321 | mdecorde | break;
|
335 | 321 | mdecorde | case "lg": |
336 | 321 | mdecorde | case "p": |
337 | 321 | mdecorde | pagedWriter.writeStartElement("p")
|
338 | 321 | mdecorde | break;
|
339 | 321 | mdecorde | case "pb": |
340 | 321 | mdecorde | createNextOutput(); |
341 | 321 | mdecorde | if (parser.getAttributeValue(null,"n") != null) { |
342 | 321 | mdecorde | pagedWriter.writeStartElement("p", ["style":"color:red", "align":"center"]); |
343 | 321 | mdecorde | pagedWriter.writeCharacters("- "+parser.getAttributeValue(null,"n")+" -") |
344 | 321 | mdecorde | pagedWriter.writeEndElement() // p
|
345 | 321 | mdecorde | } |
346 | 321 | mdecorde | break;
|
347 | 321 | mdecorde | |
348 | 321 | mdecorde | case "ab": |
349 | 321 | mdecorde | if (parser.getAttributeValue(null,"n") != null) { |
350 | 321 | mdecorde | pagedWriter.writeStartElement("p", ["align":"center"]); |
351 | 321 | mdecorde | pagedWriter.writeCharacters(parser.getAttributeValue(null,"n")) |
352 | 321 | mdecorde | pagedWriter.writeEndElement() // p
|
353 | 321 | mdecorde | } |
354 | 321 | mdecorde | break;
|
355 | 321 | mdecorde | case "l": |
356 | 321 | mdecorde | pagedWriter.writeEmptyElement("br");
|
357 | 321 | mdecorde | break;
|
358 | 321 | mdecorde | case "div": |
359 | 321 | mdecorde | if (parser.getAttributeValue(null,"type") == "titre") { |
360 | 321 | mdecorde | pagedWriter.writeStartElement("h3")
|
361 | 321 | mdecorde | closeTitre = true;
|
362 | 321 | mdecorde | } else if (parser.getAttributeValue(null,"type") == "amen") { |
363 | 321 | mdecorde | pagedWriter.writeStartElement("h3")
|
364 | 321 | mdecorde | closeAmen = true;
|
365 | 321 | mdecorde | } |
366 | 321 | mdecorde | break;
|
367 | 321 | mdecorde | case "w": |
368 | 321 | mdecorde | interpvalue = "";
|
369 | 321 | mdecorde | for (int i = 0 ; i < parser.getAttributeCount(); i++) { |
370 | 321 | mdecorde | if (parser.getAttributeLocalName(i).equals("type")) { |
371 | 321 | mdecorde | interpvalue = " pos: "+parser.getAttributeValue(i);
|
372 | 321 | mdecorde | } |
373 | 321 | mdecorde | } |
374 | 321 | mdecorde | wordid = parser.getAttributeValue(null, "id"); |
375 | 479 | mdecorde | |
376 | 479 | mdecorde | wordcount++; |
377 | 479 | mdecorde | if (wordcount >= wordmax) {
|
378 | 479 | mdecorde | createNextOutput(); |
379 | 479 | mdecorde | } |
380 | 479 | mdecorde | |
381 | 321 | mdecorde | if (firstWord) {
|
382 | 321 | mdecorde | firstWord = false;
|
383 | 321 | mdecorde | this.idxstart.add(wordid);
|
384 | 321 | mdecorde | } |
385 | 479 | mdecorde | |
386 | 321 | mdecorde | break;
|
387 | 321 | mdecorde | |
388 | 321 | mdecorde | case "choice": |
389 | 321 | mdecorde | flagchoice = true;
|
390 | 321 | mdecorde | break;
|
391 | 321 | mdecorde | case "corr": |
392 | 321 | mdecorde | flagcorr = true;
|
393 | 321 | mdecorde | break;
|
394 | 321 | mdecorde | case "sic": |
395 | 321 | mdecorde | flagsic = true;
|
396 | 321 | mdecorde | break;
|
397 | 321 | mdecorde | case "reg": |
398 | 321 | mdecorde | flagreg = true;
|
399 | 321 | mdecorde | break;
|
400 | 321 | mdecorde | case "orig": |
401 | 321 | mdecorde | flagorig = true;
|
402 | 321 | mdecorde | break;
|
403 | 321 | mdecorde | case "foreign": |
404 | 321 | mdecorde | flagforeign = true;
|
405 | 321 | mdecorde | break;
|
406 | 321 | mdecorde | case "supplied": |
407 | 321 | mdecorde | levelSupplied = levelSupplied +1;
|
408 | 321 | mdecorde | if(flagform)
|
409 | 321 | mdecorde | wordvalue = wordvalue+"[";
|
410 | 321 | mdecorde | break;
|
411 | 321 | mdecorde | |
412 | 321 | mdecorde | case "surplus": |
413 | 321 | mdecorde | flagSurplus = true;
|
414 | 321 | mdecorde | pagedWriter.writeStartElement("span", ["class": "surplus", "style":"color:red;"]) |
415 | 321 | mdecorde | break;
|
416 | 321 | mdecorde | |
417 | 321 | mdecorde | case "del": |
418 | 321 | mdecorde | flagDel = true;
|
419 | 321 | mdecorde | pagedWriter.writeStartElement("span", ["class": "del", "style":"color:red;text-decoration:line-through;"]) |
420 | 321 | mdecorde | break;
|
421 | 321 | mdecorde | |
422 | 321 | mdecorde | case "ana": |
423 | 321 | mdecorde | flaginterp=true;
|
424 | 321 | mdecorde | interpvalue+=" "+parser.getAttributeValue(null,"type").substring(1)+":" |
425 | 321 | mdecorde | break;
|
426 | 321 | mdecorde | |
427 | 321 | mdecorde | case "form": |
428 | 321 | mdecorde | wordvalue="";
|
429 | 321 | mdecorde | flagform=true;
|
430 | 321 | mdecorde | } |
431 | 321 | mdecorde | break;
|
432 | 321 | mdecorde | case XMLStreamConstants.END_ELEMENT:
|
433 | 321 | mdecorde | localname = parser.getLocalName(); |
434 | 321 | mdecorde | switch (localname) {
|
435 | 321 | mdecorde | case "head": |
436 | 321 | mdecorde | pagedWriter.writeEndElement() // h2
|
437 | 321 | mdecorde | break;
|
438 | 321 | mdecorde | |
439 | 321 | mdecorde | case "div": |
440 | 321 | mdecorde | if (closeTitre) {
|
441 | 321 | mdecorde | pagedWriter.writeEndElement() // h3
|
442 | 321 | mdecorde | closeTitre = false;
|
443 | 321 | mdecorde | } else if(closeAmen) { |
444 | 321 | mdecorde | pagedWriter.writeEndElement() // h3
|
445 | 321 | mdecorde | closeTitre = false;
|
446 | 321 | mdecorde | } |
447 | 321 | mdecorde | break;
|
448 | 321 | mdecorde | |
449 | 321 | mdecorde | case "lb": |
450 | 321 | mdecorde | pagedWriter.writeEmptyElement("br")
|
451 | 321 | mdecorde | break;
|
452 | 321 | mdecorde | case "lg": |
453 | 321 | mdecorde | case "p": |
454 | 321 | mdecorde | pagedWriter.writeEndElement() // p
|
455 | 321 | mdecorde | break;
|
456 | 321 | mdecorde | |
457 | 321 | mdecorde | case "choice": |
458 | 321 | mdecorde | flagchoice = false;
|
459 | 321 | mdecorde | break;
|
460 | 321 | mdecorde | case "corr": |
461 | 321 | mdecorde | flagcorr = false;
|
462 | 321 | mdecorde | break;
|
463 | 321 | mdecorde | case "sic": |
464 | 321 | mdecorde | flagsic = false;
|
465 | 321 | mdecorde | break;
|
466 | 321 | mdecorde | case "reg": |
467 | 321 | mdecorde | flagreg = false;
|
468 | 321 | mdecorde | break;
|
469 | 321 | mdecorde | case "orig": |
470 | 321 | mdecorde | flagorig = false;
|
471 | 321 | mdecorde | break;
|
472 | 321 | mdecorde | case "foreign": |
473 | 321 | mdecorde | flagforeign = false;
|
474 | 321 | mdecorde | break;
|
475 | 321 | mdecorde | case "supplied": |
476 | 321 | mdecorde | levelSupplied = levelSupplied -1;
|
477 | 321 | mdecorde | if (flagform)
|
478 | 321 | mdecorde | wordvalue = wordvalue+"]";
|
479 | 321 | mdecorde | break;
|
480 | 321 | mdecorde | case "surplus": |
481 | 321 | mdecorde | flagSurplus = false;
|
482 | 321 | mdecorde | pagedWriter.writeCharacters(")")
|
483 | 321 | mdecorde | pagedWriter.writeEndElement() // span
|
484 | 321 | mdecorde | break;
|
485 | 321 | mdecorde | |
486 | 321 | mdecorde | case "del": |
487 | 321 | mdecorde | flagDel = false;
|
488 | 321 | mdecorde | pagedWriter.writeEndElement() // span
|
489 | 321 | mdecorde | break;
|
490 | 321 | mdecorde | |
491 | 321 | mdecorde | case "form": |
492 | 321 | mdecorde | flagform = false
|
493 | 321 | mdecorde | break;
|
494 | 321 | mdecorde | |
495 | 321 | mdecorde | case "ana": |
496 | 321 | mdecorde | flaginterp = false
|
497 | 321 | mdecorde | break;
|
498 | 321 | mdecorde | |
499 | 321 | mdecorde | case "w": |
500 | 321 | mdecorde | int l = lastword.length();
|
501 | 321 | mdecorde | String color = ""; |
502 | 321 | mdecorde | if (flagcorr)
|
503 | 321 | mdecorde | color = "color: green;";
|
504 | 321 | mdecorde | else if (flagreg) |
505 | 321 | mdecorde | color = "color: darkgreen;";
|
506 | 321 | mdecorde | else if (flagforeign) |
507 | 321 | mdecorde | color = "color: darkred;";
|
508 | 321 | mdecorde | else if (levelSupplied == 1) |
509 | 321 | mdecorde | color = "color: blue;";
|
510 | 321 | mdecorde | else if (levelSupplied == 2) |
511 | 321 | mdecorde | color = "color: darkblue;";
|
512 | 321 | mdecorde | if (!flagchoice || flagcorr || flagreg) {
|
513 | 321 | mdecorde | String endOfLastWord = ""; |
514 | 321 | mdecorde | if (l > 0) |
515 | 321 | mdecorde | endOfLastWord = lastword.subSequence(l-1, l);
|
516 | 321 | mdecorde | |
517 | 321 | mdecorde | if (NoSpaceBefore.contains(wordvalue) ||
|
518 | 321 | mdecorde | NoSpaceAfter.contains(lastword) || |
519 | 321 | mdecorde | wordvalue.startsWith("-") ||
|
520 | 321 | mdecorde | NoSpaceAfter.contains(endOfLastWord)) |
521 | 321 | mdecorde | { |
522 | 321 | mdecorde | //multiwriter.write("<span title=\""+interpvalue+"\" id=\""+wordid+"\" "+color+">");
|
523 | 321 | mdecorde | } else {
|
524 | 321 | mdecorde | //multiwriter.write(" <span title=\""+interpvalue+"\" id=\""+wordid+"\" "+color+">");
|
525 | 321 | mdecorde | pagedWriter.writeCharacters(" ")
|
526 | 321 | mdecorde | } |
527 | 321 | mdecorde | |
528 | 321 | mdecorde | pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid, "style":color]) // span |
529 | 321 | mdecorde | pagedWriter.writeCharacters(wordvalue) |
530 | 321 | mdecorde | pagedWriter.writeEndElement() // span
|
531 | 321 | mdecorde | lastword=wordvalue; |
532 | 321 | mdecorde | } |
533 | 321 | mdecorde | break;
|
534 | 321 | mdecorde | } |
535 | 321 | mdecorde | break;
|
536 | 321 | mdecorde | |
537 | 321 | mdecorde | case XMLStreamConstants.CHARACTERS:
|
538 | 321 | mdecorde | if (flagform && parser.getText().length() > 0) |
539 | 321 | mdecorde | wordvalue += (parser.getText()); |
540 | 321 | mdecorde | if (flaginterp && parser.getText().length() > 0) |
541 | 321 | mdecorde | interpvalue += (parser.getText()); |
542 | 321 | mdecorde | break;
|
543 | 321 | mdecorde | } |
544 | 1688 | mdecorde | } |
545 | 1688 | mdecorde | if (parser != null) parser.close(); |
546 | 1688 | mdecorde | if (inputData != null) inputData.close(); |
547 | 321 | mdecorde | closeMultiWriter(); |
548 | 321 | mdecorde | } |
549 | 321 | mdecorde | } |