Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xtz / XTZDefaultPagerStep.groovy @ 1688

History | View | Annotate | Download (16.8 kB)

1 986 mdecorde
package org.txm.scripts.importer.xtz;
2 321 mdecorde
3 321 mdecorde
import java.io.File;
4 321 mdecorde
import java.io.OutputStreamWriter;
5 321 mdecorde
import java.util.ArrayList;
6 321 mdecorde
import java.util.List;
7 321 mdecorde
8 321 mdecorde
import javax.xml.stream.*;
9 321 mdecorde
10 986 mdecorde
import org.txm.scripts.importer.StaxStackWriter;
11 1064 mdecorde
import org.eclipse.ui.part.PageSwitcher
12 1000 mdecorde
import org.txm.importer.xtz.*
13 321 mdecorde
14 321 mdecorde
public class XTZDefaultPagerStep {
15 321 mdecorde
16 321 mdecorde
        List<String> NoSpaceBefore;
17 321 mdecorde
18 321 mdecorde
        /** The No space after. */
19 321 mdecorde
        List<String> NoSpaceAfter;
20 321 mdecorde
21 321 mdecorde
        /** The wordcount. */
22 321 mdecorde
        int wordcount = 0;
23 321 mdecorde
24 321 mdecorde
        /** The pagecount. */
25 321 mdecorde
        int pagecount = 0;
26 321 mdecorde
27 321 mdecorde
        /** The wordmax. */
28 321 mdecorde
        int wordmax = 0;
29 321 mdecorde
30 321 mdecorde
        /** The basename. */
31 321 mdecorde
        String basename = "";
32 321 mdecorde
        String txtname = "";
33 321 mdecorde
        File outdir;
34 321 mdecorde
35 321 mdecorde
        /** The wordid. */
36 321 mdecorde
        String wordid;
37 321 mdecorde
38 321 mdecorde
        /** The first word. */
39 321 mdecorde
        boolean firstWord = true;
40 321 mdecorde
41 321 mdecorde
        /** The wordvalue. */
42 321 mdecorde
        String wordvalue = "";
43 321 mdecorde
44 321 mdecorde
        /** The interpvalue. */
45 321 mdecorde
        String interpvalue = "";
46 321 mdecorde
47 321 mdecorde
        /** The lastword. */
48 321 mdecorde
        String lastword = " ";
49 321 mdecorde
50 321 mdecorde
        /** The wordtype. */
51 321 mdecorde
        String wordtype;
52 321 mdecorde
53 321 mdecorde
        /** The flagform. */
54 321 mdecorde
        boolean flagform = false;
55 321 mdecorde
56 321 mdecorde
        /** The flaginterp. */
57 321 mdecorde
        boolean flaginterp = false;
58 321 mdecorde
59 321 mdecorde
        /** The url. */
60 321 mdecorde
        private def url;
61 321 mdecorde
62 321 mdecorde
        /** The input data. */
63 321 mdecorde
        private def inputData;
64 321 mdecorde
65 321 mdecorde
        /** The factory. */
66 321 mdecorde
        private def factory;
67 321 mdecorde
68 321 mdecorde
        /** The parser. */
69 321 mdecorde
        private XMLStreamReader parser;
70 321 mdecorde
71 321 mdecorde
        /** The writer. */
72 321 mdecorde
        OutputStreamWriter writer;
73 321 mdecorde
74 321 mdecorde
        /** The pagedWriter. */
75 321 mdecorde
        StaxStackWriter pagedWriter = null;
76 321 mdecorde
77 321 mdecorde
        /** The infile. */
78 321 mdecorde
        File infile;
79 321 mdecorde
80 321 mdecorde
        /** The outfile. */
81 321 mdecorde
        File outfile;
82 321 mdecorde
83 321 mdecorde
        /** The pages. */
84 698 mdecorde
        //TODO enhance this to store the page name/id as well
85 321 mdecorde
        ArrayList<File> pages = new ArrayList<File>();
86 321 mdecorde
87 321 mdecorde
        /** The idxstart. */
88 321 mdecorde
        ArrayList<String> idxstart = new ArrayList<String>();
89 321 mdecorde
        String paginationElement;
90 321 mdecorde
        def cssList;
91 321 mdecorde
        def wordTag = "w";
92 321 mdecorde
        def noteElements = new HashSet<String>();
93 321 mdecorde
        def outOfTextElements = new HashSet<String>();
94 321 mdecorde
        XTZPager pager;
95 1619 mdecorde
96 321 mdecorde
        /**
97 321 mdecorde
         * Instantiates a new pager.
98 321 mdecorde
         *
99 321 mdecorde
         * @param infile the infile
100 321 mdecorde
         * @param outfile the outfile
101 321 mdecorde
         * @param NoSpaceBefore the no space before
102 321 mdecorde
         * @param NoSpaceAfter the no space after
103 321 mdecorde
         * @param max the max
104 321 mdecorde
         * @param basename the basename
105 321 mdecorde
         */
106 321 mdecorde
        public XTZDefaultPagerStep(XTZPager pager, File infile, String txtname, List<String> NoSpaceBefore,
107 321 mdecorde
        List<String> NoSpaceAfter, def cssList) {
108 321 mdecorde
                this.pager = pager;
109 321 mdecorde
                this.paginationElement = pager.page_element;
110 321 mdecorde
                this.cssList = cssList;
111 321 mdecorde
                this.basename = pager.corpusname;
112 321 mdecorde
                this.txtname = txtname;
113 321 mdecorde
                this.outdir = pager.outputDirectory;
114 321 mdecorde
                this.wordmax = pager.wordsPerPage;
115 321 mdecorde
                this.NoSpaceBefore = NoSpaceBefore;
116 321 mdecorde
                this.NoSpaceAfter = NoSpaceAfter;
117 321 mdecorde
                this.url = infile.toURI().toURL();
118 321 mdecorde
                this.infile = infile;
119 321 mdecorde
                this.wordTag= pager.wordTag;
120 321 mdecorde
                outdir.mkdirs()
121 321 mdecorde
122 321 mdecorde
                inputData = new BufferedInputStream(url.openStream());
123 321 mdecorde
                factory = XMLInputFactory.newInstance();
124 321 mdecorde
                parser = factory.createXMLStreamReader(inputData);
125 1619 mdecorde
126 1137 mdecorde
                String notesListString = pager.getImportModule().getProject().getTextualPlan("Note")
127 321 mdecorde
                if (notesListString != null) for (def s : notesListString.split(",")) noteElements << s;
128 1619 mdecorde
129 1137 mdecorde
                String elems = pager.getImportModule().getProject().getTextualPlan("OutSideTextTagsAndKeepContent")
130 321 mdecorde
                if (elems != null) for (def s : elems.split(",")) outOfTextElements << s;
131 1619 mdecorde
132 1063 mdecorde
                //process();
133 321 mdecorde
        }
134 1619 mdecorde
135 479 mdecorde
        public String getAttributeValue(def parser, String ns, String name) {
136 479 mdecorde
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
137 479 mdecorde
                        if (name == parser.getAttributeLocalName(i)) {
138 479 mdecorde
                                return parser.getAttributeValue(i).toString()
139 479 mdecorde
                        }
140 479 mdecorde
                }
141 479 mdecorde
                return "";
142 479 mdecorde
        }
143 321 mdecorde
144 321 mdecorde
        private def closeMultiWriter() {
145 321 mdecorde
                if (pagedWriter != null) {
146 321 mdecorde
                        def tags = pagedWriter.getTagStack().clone();
147 1619 mdecorde
//                        println "STACK="+pagedWriter.getTagStack()
148 1619 mdecorde
//                        def stack = Thread.currentThread().getStackTrace();
149 1619 mdecorde
//                        int m = Math.min(15, stack.size()-1)
150 1619 mdecorde
//                        for (def s : stack[1..m]) println s
151 1619 mdecorde
//                        println "FILE ="+outfile
152 321 mdecorde
                        if (firstWord) { // there was no words
153 321 mdecorde
                                pagedWriter.writeCharacters("");
154 321 mdecorde
                                this.idxstart.add("${wordTag}_0")
155 321 mdecorde
                                pagedWriter.write("<span id=\"${wordTag}_0\"/>");
156 321 mdecorde
                                //                                }
157 321 mdecorde
                        }
158 321 mdecorde
                        pagedWriter.writeEndElements();
159 1619 mdecorde
                        // write notes
160 321 mdecorde
                        if (notes.size() > 0) {
161 321 mdecorde
                                pagedWriter.writeEmptyElement("hr", ["id":"notes", "width":"20%", "align":"left"]);
162 321 mdecorde
                                //pagedWriter.writeStartElement("ol");
163 321 mdecorde
                                int i = 1;
164 321 mdecorde
                                for (String note : notes) {
165 321 mdecorde
                                        //pagedWriter.writeStartElement("li");
166 321 mdecorde
                                        pagedWriter.writeStartElement("a", ["href":"#noteref_"+i, "name":"note_"+i]);
167 321 mdecorde
                                        pagedWriter.writeStartElement("sup")
168 321 mdecorde
                                        pagedWriter.writeCharacters(""+i)
169 321 mdecorde
                                        pagedWriter.writeEndElement() // </sub>
170 321 mdecorde
                                        pagedWriter.writeEndElement() // </a>
171 321 mdecorde
                                        pagedWriter.writeCharacters(note)
172 321 mdecorde
                                        pagedWriter.writeEmptyElement("br")
173 321 mdecorde
                                        i++;
174 321 mdecorde
                                }
175 321 mdecorde
                                notes.clear()
176 321 mdecorde
                        }
177 321 mdecorde
178 321 mdecorde
                        pagedWriter.close();
179 1619 mdecorde
180 1619 mdecorde
//                        println "STACK TO REWRITE: $tags"
181 1619 mdecorde
                        for (int i = 0 ; i < tags.size() ; i++) {
182 1619 mdecorde
                                String tag = tags.remove(0)
183 1619 mdecorde
                                i--
184 1619 mdecorde
//                                println "        tag=$tag"
185 1619 mdecorde
                                if (tag == "div") {
186 1619 mdecorde
                                        break; // remove elements until first "div" tag
187 1619 mdecorde
                                }
188 1619 mdecorde
                        }
189 1619 mdecorde
//                        println "STACK TO REWRITE2: $tags"
190 1619 mdecorde
191 321 mdecorde
                        return tags;
192 321 mdecorde
                } else {
193 321 mdecorde
                        return [];
194 321 mdecorde
                }
195 321 mdecorde
        }
196 321 mdecorde
197 321 mdecorde
        /**
198 321 mdecorde
         * Creates the next output.
199 321 mdecorde
         *
200 321 mdecorde
         * @return true, if successful
201 321 mdecorde
         */
202 321 mdecorde
        private boolean createNextOutput() {
203 321 mdecorde
                wordcount = 0;
204 321 mdecorde
                try {
205 321 mdecorde
                        def tags = closeMultiWriter();
206 1619 mdecorde
207 1619 mdecorde
                        outfile = new File(outdir, txtname+"_"+(++pagecount)+".html")
208 321 mdecorde
                        pages.add(outfile);
209 321 mdecorde
                        firstWord = true; // waiting for next word
210 321 mdecorde
211 321 mdecorde
                        pagedWriter = new StaxStackWriter(outfile, "UTF-8");
212 321 mdecorde
213 1064 mdecorde
                        //pagedWriter.writeStartDocument()
214 1064 mdecorde
                        pagedWriter.writeDTD("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">")
215 1064 mdecorde
                        pagedWriter.writeCharacters("\n")
216 321 mdecorde
                        pagedWriter.writeStartElement("html");
217 1064 mdecorde
                        pagedWriter.writeCharacters("\n")
218 321 mdecorde
                        pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]);
219 321 mdecorde
                        for (String css : cssList) {
220 321 mdecorde
                                pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"$css"]);
221 321 mdecorde
                        }
222 321 mdecorde
                        pagedWriter.writeStartElement("head");
223 321 mdecorde
                        pagedWriter.writeStartElement("title")
224 321 mdecorde
                        pagedWriter.writeCharacters(basename.toUpperCase()+" Edition - Page "+pagecount)
225 321 mdecorde
                        pagedWriter.writeEndElement(); // </title>
226 321 mdecorde
                        pagedWriter.writeEndElement() // </head>
227 1064 mdecorde
                        pagedWriter.writeCharacters("\n")
228 321 mdecorde
                        pagedWriter.writeStartElement("body") //<body>
229 321 mdecorde
                        pagedWriter.writeStartElement("div", ["class": "txmeditionpage"]) //<div>
230 1619 mdecorde
//                        println "OPENING: $tags"
231 321 mdecorde
                        pagedWriter.writeStartElements(tags);
232 321 mdecorde
                        return true;
233 321 mdecorde
                } catch (Exception e) {
234 321 mdecorde
                        System.out.println(e.getLocalizedMessage());
235 1137 mdecorde
                        e.printStackTrace()
236 321 mdecorde
                        return false;
237 321 mdecorde
                }
238 321 mdecorde
        }
239 321 mdecorde
240 321 mdecorde
        /**
241 321 mdecorde
         * Creates the output.
242 321 mdecorde
         *
243 321 mdecorde
         * @param outfile the outfile
244 321 mdecorde
         * @return true, if successful
245 321 mdecorde
         */
246 321 mdecorde
        private boolean createOutput() {
247 321 mdecorde
                try {
248 321 mdecorde
                        return createNextOutput();
249 321 mdecorde
                } catch (Exception e) {
250 321 mdecorde
                        System.out.println(e.getLocalizedMessage());
251 321 mdecorde
                        return false;
252 321 mdecorde
                }
253 321 mdecorde
        }
254 321 mdecorde
255 321 mdecorde
        /**
256 321 mdecorde
         * Gets the page files.
257 321 mdecorde
         *
258 321 mdecorde
         * @return the page files
259 321 mdecorde
         */
260 321 mdecorde
        public ArrayList<File> getPageFiles() {
261 321 mdecorde
                return pages;
262 321 mdecorde
        }
263 321 mdecorde
264 321 mdecorde
        /**
265 321 mdecorde
         * Gets the idx.
266 321 mdecorde
         *
267 321 mdecorde
         * @return the idx
268 321 mdecorde
         */
269 321 mdecorde
        public ArrayList<String> getIdx() {
270 321 mdecorde
                return idxstart;
271 321 mdecorde
        }
272 321 mdecorde
273 321 mdecorde
        /**
274 321 mdecorde
         * Go to text.
275 321 mdecorde
         */
276 321 mdecorde
        private void goToText() {
277 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
278 321 mdecorde
                        if (event == XMLStreamConstants.END_ELEMENT)
279 321 mdecorde
                                if (parser.getLocalName().matches("teiHeader"))
280 321 mdecorde
                                        return;
281 321 mdecorde
                }
282 321 mdecorde
        }
283 321 mdecorde
284 321 mdecorde
        def notes = []
285 321 mdecorde
        def currentOutOfTextElements = [] // stack of element with out of text to edit opened element
286 321 mdecorde
        def writeOutOfTextToEditText = false
287 321 mdecorde
        /**
288 321 mdecorde
         * Process.
289 321 mdecorde
         */
290 321 mdecorde
        public boolean process() {
291 321 mdecorde
292 1619 mdecorde
                try {
293 1619 mdecorde
                        boolean flagNote = false;
294 1619 mdecorde
                        String noteContent = "";
295 1619 mdecorde
                        String rend = ""
296 1619 mdecorde
                        goToText();
297 321 mdecorde
298 1619 mdecorde
                        String localname = "";
299 1619 mdecorde
                        if (!createNextOutput()) {
300 1619 mdecorde
                                return false;
301 1619 mdecorde
                        }
302 1619 mdecorde
303 1619 mdecorde
                        for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
304 1619 mdecorde
                                rend = "";
305 1619 mdecorde
                                switch (event) {
306 1619 mdecorde
                                        case XMLStreamConstants.START_ELEMENT:
307 1619 mdecorde
                                                localname = parser.getLocalName();
308 1619 mdecorde
                                                if (outOfTextElements.contains(localname)) {
309 1619 mdecorde
                                                        currentOutOfTextElements << localname
310 1619 mdecorde
                                                        writeOutOfTextToEditText = true;
311 1619 mdecorde
                                                } else if (currentOutOfTextElements.size() > 0) {
312 1619 mdecorde
                                                        currentOutOfTextElements << localname
313 479 mdecorde
                                                }
314 1619 mdecorde
315 1619 mdecorde
                                                if (localname == paginationElement) {
316 1619 mdecorde
                                                        createNextOutput();
317 1619 mdecorde
                                                        wordcount=0;
318 321 mdecorde
                                                        pagedWriter.write("\n");
319 1619 mdecorde
                                                        if (getAttributeValue(parser, null,"n") != null) {
320 1619 mdecorde
                                                                pagedWriter.writeElement("p", ["class":"txmeditionpb", "align":"center"], getAttributeValue(parser, null,"n"))
321 321 mdecorde
                                                        }
322 1619 mdecorde
                                                }
323 321 mdecorde
324 1619 mdecorde
                                                rend = getAttributeValue(parser, null, "rend")
325 1619 mdecorde
                                                if (rend == null) rend = "";
326 1619 mdecorde
                                                switch (localname) {
327 1619 mdecorde
                                                        case "text":
328 1619 mdecorde
                                                                LinkedHashMap attributes = new LinkedHashMap();
329 1619 mdecorde
                                                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
330 1619 mdecorde
                                                                        attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i).toString()
331 1619 mdecorde
                                                                }
332 321 mdecorde
333 1619 mdecorde
                                                                pagedWriter.write("\n");
334 1619 mdecorde
                                                                pagedWriter.writeStartElement("p")
335 1619 mdecorde
                                                                pagedWriter.writeAttribute("class", rend);
336 1619 mdecorde
                                                                if (attributes.containsKey("id")) {
337 1619 mdecorde
                                                                        pagedWriter.writeElement("h3", attributes["id"])
338 1619 mdecorde
                                                                }
339 1619 mdecorde
340 1619 mdecorde
                                                                pagedWriter.writeStartElement("table");
341 1619 mdecorde
                                                                for (String k : attributes.keySet()) {
342 1619 mdecorde
                                                                        if (k == "id") continue;
343 1619 mdecorde
                                                                        if (k == "rend") continue;
344 1619 mdecorde
345 1619 mdecorde
                                                                        pagedWriter.writeStartElement("tr");
346 1619 mdecorde
                                                                        pagedWriter.writeElement("td", k);
347 1619 mdecorde
                                                                        pagedWriter.writeElement("td", attributes[k]);
348 1619 mdecorde
                                                                        pagedWriter.writeEndElement(); //tr
349 1619 mdecorde
                                                                }
350 1619 mdecorde
                                                                pagedWriter.writeEndElement() // table
351 1619 mdecorde
                                                                pagedWriter.writeEndElement() // p
352 1619 mdecorde
                                                                pagedWriter.writeCharacters("\n");
353 1619 mdecorde
                                                                break;
354 1619 mdecorde
                                                        case "ref":
355 1619 mdecorde
                                                                pagedWriter.writeStartElement("a")
356 1619 mdecorde
                                                                pagedWriter.writeAttribute("href", getAttributeValue(parser, null, "target"));
357 1619 mdecorde
                                                                pagedWriter.writeAttribute("target", "_blank");
358 1619 mdecorde
                                                                pagedWriter.writeAttribute("class", rend);
359 1619 mdecorde
                                                                break;
360 1619 mdecorde
                                                        case "head":
361 1619 mdecorde
                                                                pagedWriter.write("\n");
362 1619 mdecorde
                                                                pagedWriter.writeStartElement("h2", ["class":rend])
363 1619 mdecorde
                                                                break;
364 1619 mdecorde
                                                        case "graphic":
365 1619 mdecorde
                                                                pagedWriter.write("\n");
366 1619 mdecorde
                                                                String url = getAttributeValue(parser, null, "url")
367 1619 mdecorde
                                                                if (url != null) {
368 1619 mdecorde
                                                                        // TEI <graphic rend="left-image" url="image.png"/> -> <center class="left-image"><img href="image.png"/></center> + <moncorpus>.css avec rule ".left-image"
369 1619 mdecorde
                                                                        pagedWriter.writeStartElement("center", ["class":rend]) // css -> .<rend> { ... } styles OR
370 1619 mdecorde
                                                                        pagedWriter.writeEmptyElement("img", ["src":url, "align":"middle"])
371 1619 mdecorde
                                                                        pagedWriter.writeEndElement() // center
372 1619 mdecorde
                                                                }
373 1619 mdecorde
                                                                break;
374 1619 mdecorde
                                                        case "table":
375 1619 mdecorde
                                                                pagedWriter.writeStartElement("table", ["class":rend])
376 1619 mdecorde
                                                                pagedWriter.write("\n");
377 1619 mdecorde
                                                                break;
378 1619 mdecorde
                                                        case "row":
379 1619 mdecorde
                                                                pagedWriter.writeStartElement("tr", ["class":rend])
380 1619 mdecorde
                                                                break;
381 1619 mdecorde
                                                        case "cell":
382 1619 mdecorde
                                                                pagedWriter.writeStartElement("td", ["class":rend])
383 1619 mdecorde
                                                                break;
384 1619 mdecorde
                                                        case "list":
385 1619 mdecorde
                                                                String type = getAttributeValue(parser, null,"type");
386 1619 mdecorde
                                                                if ("unordered" == type) {
387 1619 mdecorde
                                                                        pagedWriter.writeStartElement("ul", ["class":rend])
388 1619 mdecorde
                                                                } else {
389 1619 mdecorde
                                                                        pagedWriter.writeStartElement("ol", ["class":rend])
390 1619 mdecorde
                                                                }
391 1619 mdecorde
                                                                break
392 1619 mdecorde
                                                        case "item":
393 1619 mdecorde
                                                                pagedWriter.writeStartElement("li", ["class":rend])
394 1619 mdecorde
                                                                break;
395 1619 mdecorde
                                                        case "hi":
396 1619 mdecorde
                                                        case "emph":
397 1619 mdecorde
                                                                if ("i".equals(rend) || "italic".equals(rend)) {
398 321 mdecorde
                                                                        pagedWriter.writeStartElement("i", ["class":rend])
399 1619 mdecorde
                                                                } else if ("b".equals(rend) || "bold".equals(rend)) {
400 321 mdecorde
                                                                        pagedWriter.writeStartElement("b", ["class":rend])
401 1619 mdecorde
                                                                } else {
402 1619 mdecorde
                                                                        if ("emph".equals(localname)) {
403 1619 mdecorde
                                                                                pagedWriter.writeStartElement("i", ["class":rend])
404 1619 mdecorde
                                                                        } else { // hi
405 1619 mdecorde
                                                                                pagedWriter.writeStartElement("b", ["class":rend])
406 1619 mdecorde
                                                                        }
407 321 mdecorde
                                                                }
408 1619 mdecorde
                                                                break;
409 1619 mdecorde
                                                        case "p":
410 1619 mdecorde
                                                        //case "lg":
411 1619 mdecorde
                                                                pagedWriter.write("\n");
412 1619 mdecorde
                                                                pagedWriter.writeStartElement("p", ["class":rend])
413 1619 mdecorde
                                                                break;
414 1619 mdecorde
                                                        case "div":
415 1619 mdecorde
                                                        case "div1":
416 1619 mdecorde
                                                        case "div2":
417 1619 mdecorde
                                                        case "div3":
418 1619 mdecorde
                                                        case "div4":
419 1619 mdecorde
                                                        case "div5":
420 1619 mdecorde
                                                                pagedWriter.writeStartElement("div", ["class":rend, "type":localname])
421 1619 mdecorde
                                                                break;
422 1619 mdecorde
                                                        case "lb":
423 1619 mdecorde
                                                        //case "l":
424 1619 mdecorde
                                                                pagedWriter.writeEmptyElement("br", ["class":rend])
425 1619 mdecorde
                                                                break;
426 1619 mdecorde
                                                        case wordTag:
427 1619 mdecorde
                                                                wordid = getAttributeValue(parser, null,"id");
428 321 mdecorde
429 1619 mdecorde
                                                                wordcount++;
430 1619 mdecorde
                                                                if (wordcount >= wordmax) {
431 1619 mdecorde
                                                                        createNextOutput();
432 1619 mdecorde
                                                                }
433 321 mdecorde
434 1619 mdecorde
                                                                if (firstWord) {
435 1619 mdecorde
                                                                        firstWord = false;
436 1619 mdecorde
                                                                        this.idxstart.add(wordid);
437 1619 mdecorde
                                                                }
438 321 mdecorde
439 1619 mdecorde
                                                                break;
440 1619 mdecorde
                                                        case "ana":
441 1619 mdecorde
                                                                flaginterp=true;
442 1619 mdecorde
                                                                interpvalue+=" "+getAttributeValue(parser, null, "type").substring(1)+":"
443 1619 mdecorde
                                                                break;
444 1619 mdecorde
                                                        case "form":
445 1619 mdecorde
                                                                wordvalue="";
446 1619 mdecorde
                                                                interpvalue ="";
447 1619 mdecorde
                                                                flagform=true;
448 1619 mdecorde
                                                                break;
449 1619 mdecorde
                                                        default:
450 1619 mdecorde
                                                                if (noteElements.contains(localname)) {
451 1619 mdecorde
                                                                        flagNote = true;
452 1619 mdecorde
                                                                        noteContent = ""
453 321 mdecorde
                                                                }
454 1619 mdecorde
                                                        //                                                         else {
455 1619 mdecorde
                                                        //                                                                pagedWriter.writeStartElement("span", ["class":localname])
456 1619 mdecorde
                                                        //                                                        }
457 1619 mdecorde
                                                                break;
458 1619 mdecorde
                                                }
459 1619 mdecorde
                                                break;
460 1619 mdecorde
                                        case XMLStreamConstants.END_ELEMENT:
461 1619 mdecorde
                                                localname = parser.getLocalName();
462 1619 mdecorde
                                                if (currentOutOfTextElements.size() > 0) currentOutOfTextElements.pop()
463 1619 mdecorde
                                                writeOutOfTextToEditText = currentOutOfTextElements.size() > 0
464 1619 mdecorde
465 1619 mdecorde
                                                switch (localname) {
466 1619 mdecorde
                                                        case "text":
467 1619 mdecorde
                                                                break;
468 1619 mdecorde
                                                        case "p":
469 1619 mdecorde
                                                        //case "lg":
470 1619 mdecorde
                                                                pagedWriter.writeEndElement() // </p>
471 1619 mdecorde
                                                                pagedWriter.write("\n");
472 1619 mdecorde
                                                                break;
473 1619 mdecorde
                                                        case "div":
474 1619 mdecorde
                                                        case "div1":
475 1619 mdecorde
                                                        case "div2":
476 1619 mdecorde
                                                        case "div3":
477 1619 mdecorde
                                                        case "div4":
478 1619 mdecorde
                                                        case "div5":
479 1619 mdecorde
                                                                pagedWriter.writeEndElement() // </div>
480 1619 mdecorde
                                                                pagedWriter.write("\n");
481 1619 mdecorde
                                                                break;
482 1619 mdecorde
                                                        case "head":
483 1619 mdecorde
                                                                pagedWriter.writeEndElement() // </h2>
484 1619 mdecorde
                                                                pagedWriter.write("\n");
485 1619 mdecorde
                                                                break;
486 1619 mdecorde
                                                        case "list":
487 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // ul or ol
488 1619 mdecorde
                                                                pagedWriter.write("\n");
489 1619 mdecorde
                                                                break
490 1619 mdecorde
                                                        case "item":
491 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // li
492 1619 mdecorde
                                                                pagedWriter.write("\n");
493 1619 mdecorde
                                                                break;
494 1619 mdecorde
                                                        case "hi":
495 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // b
496 1619 mdecorde
                                                                break;
497 1619 mdecorde
                                                        case "emph":
498 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // i
499 1619 mdecorde
                                                                break;
500 1619 mdecorde
                                                        case "table":
501 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // table
502 1619 mdecorde
                                                                pagedWriter.write("\n");
503 1619 mdecorde
                                                                break;
504 1619 mdecorde
                                                        case "row":
505 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // tr
506 1619 mdecorde
                                                                break;
507 1619 mdecorde
                                                        case "cell":
508 1619 mdecorde
                                                                pagedWriter.writeEndElement(); // td
509 1619 mdecorde
                                                                break;
510 1619 mdecorde
                                                        case "ref":
511 1619 mdecorde
                                                                pagedWriter.writeEndElement() // </a>
512 1619 mdecorde
                                                                break;
513 1619 mdecorde
                                                        case "form":
514 1619 mdecorde
                                                                flagform = false
515 1619 mdecorde
                                                                break;
516 1619 mdecorde
                                                        case "ana":
517 1619 mdecorde
                                                                flaginterp = false
518 1619 mdecorde
                                                                break;
519 1619 mdecorde
                                                        case wordTag:
520 1619 mdecorde
                                                                int l = lastword.length();
521 1619 mdecorde
                                                                String endOfLastWord = "";
522 1619 mdecorde
                                                                if (l > 0)
523 1619 mdecorde
                                                                        endOfLastWord = lastword.subSequence(l-1, l);
524 1619 mdecorde
525 1619 mdecorde
                                                                if (interpvalue != null)
526 1619 mdecorde
                                                                        interpvalue = interpvalue;
527 1619 mdecorde
528 1619 mdecorde
                                                                if (NoSpaceBefore.contains(wordvalue) ||
529 1619 mdecorde
                                                                NoSpaceAfter.contains(lastword) ||
530 1619 mdecorde
                                                                wordvalue.startsWith("-") ||
531 1619 mdecorde
                                                                NoSpaceAfter.contains(endOfLastWord)) {
532 1619 mdecorde
                                                                        pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]);
533 1619 mdecorde
                                                                } else {
534 1619 mdecorde
                                                                        pagedWriter.writeCharacters("\n");
535 1619 mdecorde
                                                                        pagedWriter.writeStartElement("span", ["title":interpvalue, "id":wordid]);
536 1619 mdecorde
                                                                }
537 1619 mdecorde
538 1619 mdecorde
                                                                pagedWriter.writeCharacters(wordvalue);
539 1619 mdecorde
                                                                pagedWriter.writeEndElement();
540 1619 mdecorde
                                                                //pagedWriter.writeComment("\n")
541 1619 mdecorde
                                                                lastword=wordvalue;
542 1619 mdecorde
                                                                break;
543 1619 mdecorde
                                                        default:
544 1619 mdecorde
                                                                if (noteElements.contains(localname)) {
545 1619 mdecorde
                                                                        flagNote = false;
546 1619 mdecorde
                                                                        if (noteContent.length() > 0) {
547 1619 mdecorde
                                                                                notes << noteContent;
548 1619 mdecorde
                                                                                pagedWriter.writeStartElement("a", ["href":"#note_"+notes.size(), "name":"noteref_"+notes.size(), "title":noteContent]);
549 1619 mdecorde
                                                                                pagedWriter.writeStartElement("sup")
550 1619 mdecorde
                                                                                pagedWriter.writeCharacters(""+notes.size())
551 1619 mdecorde
                                                                                pagedWriter.writeEndElement() // </sub>
552 1619 mdecorde
                                                                                pagedWriter.writeEndElement() // </a>
553 1619 mdecorde
                                                                        }
554 1619 mdecorde
                                                                }
555 1619 mdecorde
                                                        //                                                        else {
556 1619 mdecorde
                                                        //                                                                pagedWriter.writeEndElement() // the element
557 1619 mdecorde
                                                        //                                                        }
558 1619 mdecorde
                                                                break;
559 1619 mdecorde
                                                }
560 1619 mdecorde
                                                break;
561 1619 mdecorde
                                        case XMLStreamConstants.CHARACTERS:
562 1619 mdecorde
                                                if (flagform && parser.getText().length() > 0) {
563 1619 mdecorde
                                                        wordvalue+=(parser.getText());
564 1619 mdecorde
                                                        if (flagNote == parser.getText().length() > 0)
565 1619 mdecorde
                                                                noteContent += parser.getText().replace("\n", " ");
566 1619 mdecorde
                                                } else        if (flaginterp && parser.getText().length() > 0) {
567 1619 mdecorde
                                                        interpvalue+=(parser.getText());
568 1619 mdecorde
                                                } else if (flagNote == parser.getText().length() > 0) {
569 321 mdecorde
                                                        noteContent += parser.getText().replace("\n", " ");
570 1619 mdecorde
                                                } else if (writeOutOfTextToEditText) {
571 1619 mdecorde
                                                        pagedWriter.writeCharacters(parser.getText())
572 1619 mdecorde
                                                }
573 1619 mdecorde
                                                break;
574 1619 mdecorde
                                }
575 321 mdecorde
                        }
576 1619 mdecorde
                        closeMultiWriter();
577 1688 mdecorde
                        if (parser != null) parser.close();
578 1688 mdecorde
                        if (inputData != null) inputData.close();
579 1619 mdecorde
                } catch(Exception e) {
580 1619 mdecorde
                        println "** Fail to build $infile edition: $e at "+parser.getLocation()
581 1619 mdecorde
                        println "** resulting file: $outfile"
582 1619 mdecorde
                        println "** Stax stack: "+pagedWriter.getTagStack()
583 1619 mdecorde
                        e.printStackTrace();
584 1619 mdecorde
                        pagedWriter.close()
585 1688 mdecorde
                        if (parser != null) parser.close();
586 1688 mdecorde
                        if (inputData != null) inputData.close();
587 1619 mdecorde
                        return false;
588 321 mdecorde
                }
589 1619 mdecorde
                return true;
590 321 mdecorde
        }
591 321 mdecorde
}