Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / edition / BuildFacsEditions.groovy @ 1688

History | View | Annotate | Download (4.7 kB)

1
package org.txm.macro.edition
2

    
3
import org.txm.scripts.importer.*
4
import org.xml.sax.Attributes
5
import org.txm.importer.scripts.filters.*
6
import java.util.ArrayList
7
import javax.xml.parsers.*
8
import javax.xml.stream.*
9
import java.net.URL
10
import org.xml.sax.InputSource
11
import org.xml.sax.helpers.DefaultHandler
12

    
13
class BuildFacsEditions {
14

    
15
        private def url
16
        private def inputData
17
        private def factory
18
        private XMLStreamReader parser
19
        OutputStreamWriter writer
20
        StaxStackWriter pagedWriter = null
21

    
22
        File editionDir
23
        File xmlFile
24
        File htmlFile
25
        def pages = []
26
        def tag, attribute, txtname, corpusname
27
        boolean firstWord
28
        boolean cutBefore = true;
29
        boolean debug = false;
30
        
31
        public BuildFacsEditions(File xmlFile, File editionDir, String corpusname, String txtname, String tag, String attribute, boolean debug) {
32
                inputData = xmlFile.toURI().toURL().openStream()
33
                factory = XMLInputFactory.newInstance()
34
                parser = factory.createXMLStreamReader(inputData)
35
                
36
                this.xmlFile = xmlFile
37
                this.editionDir = editionDir
38
                this.tag = tag
39
                this.attribute = attribute
40
                this.txtname = txtname
41
                this.debug = debug
42
        }
43
        
44
        int n = 1;
45
        private boolean createNextOutput()
46
        {
47
                try {
48
                        def tags = closeMultiWriter();
49
                        for (int i = 0 ; i < tags.size() ; i++) {
50
                                String tag = tags[i]
51
                                if ("body" != tag) {
52
                                        tags.remove(i--)
53
                                } else {
54
                                        tags.remove(i--) // remove "body"
55
                                        break; // remove elements until "body tag
56
                                }
57
                        }
58
                        if (wordid != null) {//wordid = "w_0";
59
                                //println " add page $n $wordid, page=$pages"
60
                                pages << ["$n", wordid]
61
                                n++
62
                        }
63
                        // Page suivante
64
                        
65
                        htmlFile = new File(editionDir, "${txtname}_${n}.html")
66
                        firstWord = true
67
        //println "SET FIRST WORD=true"
68
                        pagedWriter = new StaxStackWriter(htmlFile, "UTF-8");
69
                        if (debug) println "Create file $htmlFile"
70
                        pagedWriter.writeStartDocument("UTF-8", "1.0")
71
                        pagedWriter.writeStartElement("html");
72
                        pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]);
73
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"txm.css"]);
74
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"${corpusname}.css"]);
75
                        pagedWriter.writeStartElement("head");
76
                        pagedWriter.writeStartElement("title")
77
                        pagedWriter.writeCharacters(corpusname+" Edition - Page "+n)
78
                        pagedWriter.writeEndElement(); // </title>
79
                        pagedWriter.writeEndElement() // </head>
80
                        pagedWriter.writeStartElement("body") //<body>
81

    
82
                        pagedWriter.writeStartElements(tags);
83
                        
84
                        wordid = "w_0"; // default value if no word is found
85
                        return true;
86
                } catch (Exception e) {
87
                        System.out.println(e.getLocalizedMessage());
88
                        return false;
89
                }
90
        }
91
        
92
        private def closeMultiWriter()
93
        {
94
                if (pagedWriter != null) {
95
                        def tags = pagedWriter.getTagStack().clone();
96

    
97
                        if (firstWord) { // there was no words
98
                                pagedWriter.writeCharacters("");
99
                                pagedWriter.write("<span id=\"w_0\"/>");
100
                        }
101
                        pagedWriter.writeEndElements();
102
                        pagedWriter.close();
103
                        return tags;
104
                } else {
105
                        return [];
106
                }
107
        }
108
        
109
        private writeImg(String src) {
110
                pagedWriter.writeStartElement("div");
111
                pagedWriter.writeEmptyElement("img", ["src":src, "width":"100%"]);
112
                pagedWriter.writeEndElement(); // </div>
113
        }
114
        
115
        String wordid = null;
116
        public def process() {
117
                
118
                boolean start = false
119
                String localname
120
                
121
                createNextOutput();
122
                
123
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
124
                        switch (event) {
125
                                case XMLStreamConstants.START_ELEMENT:
126
                                        localname = parser.getLocalName();
127
                                        switch (localname) {
128
                                                case "text":
129
                                                        start = true
130
                                                break;
131
                                                case "w":
132
                                                        if (firstWord) {
133
                                                                wordid = parser.getAttributeValue(null, "id");
134
                                                                //println "found word: $wordid"
135
                                                                firstWord = false;
136
                                                        }
137
                                                break;
138
                                                case tag:
139
                                                        if (debug) println "** TAG $tag $attribute : "+parser.getAttributeValue(null, "id");
140
                                                        String imgPath = parser.getAttributeValue(null, attribute);
141
                                                        if (imgPath == null) {
142
                                                                println "ERROR in $xmlFile no value found for $tag@$attribute at location "+parser.getLocation().getLineNumber()
143
                                                        } else {
144
                                                                if (cutBefore) {
145
                                                                        if (debug) println " cut before"
146
                                                                        createNextOutput()
147
                                                                        if (debug) println " write img $imgPath"
148
                                                                        writeImg(imgPath)
149
                                                                } else {
150
                                                                        if (debug) println " write img $imgPath"
151
                                                                        writeImg(imgPath)
152
                                                                        if (debug) println " cut after"
153
                                                                        createNextOutput()
154
                                                                }
155
                                                        }
156
                                                        //firstWord = true;
157
                                                break;
158
                                        }
159
                                break;
160
                        }
161
                }
162
                if (parser != null) parser.close();
163
                if (inputData != null) inputData.close();
164
                closeMultiWriter()
165
                pages << ["$n", wordid] // add the last page (no pb encountered
166
                return pages
167
        }
168
}