Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / edition / BuildFacsEditions.groovy @ 1688

History | View | Annotate | Download (4.7 kB)

1 321 mdecorde
package org.txm.macro.edition
2 321 mdecorde
3 986 mdecorde
import org.txm.scripts.importer.*
4 321 mdecorde
import org.xml.sax.Attributes
5 1000 mdecorde
import org.txm.importer.scripts.filters.*
6 321 mdecorde
import java.util.ArrayList
7 321 mdecorde
import javax.xml.parsers.*
8 321 mdecorde
import javax.xml.stream.*
9 321 mdecorde
import java.net.URL
10 321 mdecorde
import org.xml.sax.InputSource
11 321 mdecorde
import org.xml.sax.helpers.DefaultHandler
12 321 mdecorde
13 321 mdecorde
class BuildFacsEditions {
14 321 mdecorde
15 321 mdecorde
        private def url
16 321 mdecorde
        private def inputData
17 321 mdecorde
        private def factory
18 321 mdecorde
        private XMLStreamReader parser
19 321 mdecorde
        OutputStreamWriter writer
20 321 mdecorde
        StaxStackWriter pagedWriter = null
21 321 mdecorde
22 321 mdecorde
        File editionDir
23 321 mdecorde
        File xmlFile
24 321 mdecorde
        File htmlFile
25 321 mdecorde
        def pages = []
26 321 mdecorde
        def tag, attribute, txtname, corpusname
27 321 mdecorde
        boolean firstWord
28 321 mdecorde
        boolean cutBefore = true;
29 321 mdecorde
        boolean debug = false;
30 321 mdecorde
31 321 mdecorde
        public BuildFacsEditions(File xmlFile, File editionDir, String corpusname, String txtname, String tag, String attribute, boolean debug) {
32 321 mdecorde
                inputData = xmlFile.toURI().toURL().openStream()
33 321 mdecorde
                factory = XMLInputFactory.newInstance()
34 321 mdecorde
                parser = factory.createXMLStreamReader(inputData)
35 321 mdecorde
36 321 mdecorde
                this.xmlFile = xmlFile
37 321 mdecorde
                this.editionDir = editionDir
38 321 mdecorde
                this.tag = tag
39 321 mdecorde
                this.attribute = attribute
40 321 mdecorde
                this.txtname = txtname
41 321 mdecorde
                this.debug = debug
42 321 mdecorde
        }
43 321 mdecorde
44 321 mdecorde
        int n = 1;
45 321 mdecorde
        private boolean createNextOutput()
46 321 mdecorde
        {
47 321 mdecorde
                try {
48 321 mdecorde
                        def tags = closeMultiWriter();
49 321 mdecorde
                        for (int i = 0 ; i < tags.size() ; i++) {
50 321 mdecorde
                                String tag = tags[i]
51 321 mdecorde
                                if ("body" != tag) {
52 321 mdecorde
                                        tags.remove(i--)
53 321 mdecorde
                                } else {
54 321 mdecorde
                                        tags.remove(i--) // remove "body"
55 321 mdecorde
                                        break; // remove elements until "body tag
56 321 mdecorde
                                }
57 321 mdecorde
                        }
58 321 mdecorde
                        if (wordid != null) {//wordid = "w_0";
59 321 mdecorde
                                //println " add page $n $wordid, page=$pages"
60 321 mdecorde
                                pages << ["$n", wordid]
61 321 mdecorde
                                n++
62 321 mdecorde
                        }
63 321 mdecorde
                        // Page suivante
64 321 mdecorde
65 321 mdecorde
                        htmlFile = new File(editionDir, "${txtname}_${n}.html")
66 321 mdecorde
                        firstWord = true
67 321 mdecorde
        //println "SET FIRST WORD=true"
68 321 mdecorde
                        pagedWriter = new StaxStackWriter(htmlFile, "UTF-8");
69 321 mdecorde
                        if (debug) println "Create file $htmlFile"
70 321 mdecorde
                        pagedWriter.writeStartDocument("UTF-8", "1.0")
71 321 mdecorde
                        pagedWriter.writeStartElement("html");
72 321 mdecorde
                        pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]);
73 321 mdecorde
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"txm.css"]);
74 321 mdecorde
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"${corpusname}.css"]);
75 321 mdecorde
                        pagedWriter.writeStartElement("head");
76 321 mdecorde
                        pagedWriter.writeStartElement("title")
77 321 mdecorde
                        pagedWriter.writeCharacters(corpusname+" Edition - Page "+n)
78 321 mdecorde
                        pagedWriter.writeEndElement(); // </title>
79 321 mdecorde
                        pagedWriter.writeEndElement() // </head>
80 321 mdecorde
                        pagedWriter.writeStartElement("body") //<body>
81 321 mdecorde
82 321 mdecorde
                        pagedWriter.writeStartElements(tags);
83 321 mdecorde
84 321 mdecorde
                        wordid = "w_0"; // default value if no word is found
85 321 mdecorde
                        return true;
86 321 mdecorde
                } catch (Exception e) {
87 321 mdecorde
                        System.out.println(e.getLocalizedMessage());
88 321 mdecorde
                        return false;
89 321 mdecorde
                }
90 321 mdecorde
        }
91 321 mdecorde
92 321 mdecorde
        private def closeMultiWriter()
93 321 mdecorde
        {
94 321 mdecorde
                if (pagedWriter != null) {
95 321 mdecorde
                        def tags = pagedWriter.getTagStack().clone();
96 321 mdecorde
97 321 mdecorde
                        if (firstWord) { // there was no words
98 321 mdecorde
                                pagedWriter.writeCharacters("");
99 321 mdecorde
                                pagedWriter.write("<span id=\"w_0\"/>");
100 321 mdecorde
                        }
101 321 mdecorde
                        pagedWriter.writeEndElements();
102 321 mdecorde
                        pagedWriter.close();
103 321 mdecorde
                        return tags;
104 321 mdecorde
                } else {
105 321 mdecorde
                        return [];
106 321 mdecorde
                }
107 321 mdecorde
        }
108 321 mdecorde
109 321 mdecorde
        private writeImg(String src) {
110 321 mdecorde
                pagedWriter.writeStartElement("div");
111 321 mdecorde
                pagedWriter.writeEmptyElement("img", ["src":src, "width":"100%"]);
112 321 mdecorde
                pagedWriter.writeEndElement(); // </div>
113 321 mdecorde
        }
114 321 mdecorde
115 321 mdecorde
        String wordid = null;
116 321 mdecorde
        public def process() {
117 321 mdecorde
118 321 mdecorde
                boolean start = false
119 321 mdecorde
                String localname
120 321 mdecorde
121 321 mdecorde
                createNextOutput();
122 321 mdecorde
123 321 mdecorde
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
124 321 mdecorde
                        switch (event) {
125 321 mdecorde
                                case XMLStreamConstants.START_ELEMENT:
126 321 mdecorde
                                        localname = parser.getLocalName();
127 321 mdecorde
                                        switch (localname) {
128 321 mdecorde
                                                case "text":
129 321 mdecorde
                                                        start = true
130 321 mdecorde
                                                break;
131 321 mdecorde
                                                case "w":
132 321 mdecorde
                                                        if (firstWord) {
133 321 mdecorde
                                                                wordid = parser.getAttributeValue(null, "id");
134 321 mdecorde
                                                                //println "found word: $wordid"
135 321 mdecorde
                                                                firstWord = false;
136 321 mdecorde
                                                        }
137 321 mdecorde
                                                break;
138 321 mdecorde
                                                case tag:
139 321 mdecorde
                                                        if (debug) println "** TAG $tag $attribute : "+parser.getAttributeValue(null, "id");
140 321 mdecorde
                                                        String imgPath = parser.getAttributeValue(null, attribute);
141 321 mdecorde
                                                        if (imgPath == null) {
142 321 mdecorde
                                                                println "ERROR in $xmlFile no value found for $tag@$attribute at location "+parser.getLocation().getLineNumber()
143 321 mdecorde
                                                        } else {
144 321 mdecorde
                                                                if (cutBefore) {
145 321 mdecorde
                                                                        if (debug) println " cut before"
146 321 mdecorde
                                                                        createNextOutput()
147 321 mdecorde
                                                                        if (debug) println " write img $imgPath"
148 321 mdecorde
                                                                        writeImg(imgPath)
149 321 mdecorde
                                                                } else {
150 321 mdecorde
                                                                        if (debug) println " write img $imgPath"
151 321 mdecorde
                                                                        writeImg(imgPath)
152 321 mdecorde
                                                                        if (debug) println " cut after"
153 321 mdecorde
                                                                        createNextOutput()
154 321 mdecorde
                                                                }
155 321 mdecorde
                                                        }
156 321 mdecorde
                                                        //firstWord = true;
157 321 mdecorde
                                                break;
158 321 mdecorde
                                        }
159 321 mdecorde
                                break;
160 321 mdecorde
                        }
161 321 mdecorde
                }
162 1688 mdecorde
                if (parser != null) parser.close();
163 1688 mdecorde
                if (inputData != null) inputData.close();
164 321 mdecorde
                closeMultiWriter()
165 321 mdecorde
                pages << ["$n", wordid] // add the last page (no pb encountered
166 321 mdecorde
                return pages
167 321 mdecorde
        }
168 321 mdecorde
}