Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / edition / BuildXTZEditions.groovy @ 1688

History | View | Annotate | Download (3.7 kB)

1
package org.txm.macro.edition
2

    
3
import org.txm.scripts.importer.*
4
import org.xml.sax.Attributes
5
import org.txm.importer.scripts.filters.*
6
import java.util.ArrayList
7
import javax.xml.parsers.*
8
import javax.xml.stream.*
9
import java.net.URL
10
import org.xml.sax.InputSource
11
import org.xml.sax.helpers.DefaultHandler
12

    
13
class BuildXTZEditions {
14

    
15
        private def url
16
        private def inputData
17
        private def factory
18
        private XMLStreamReader parser
19
        OutputStreamWriter writer
20
        StaxStackWriter pagedWriter = null
21

    
22
        File editionDir
23
        File xmlFile
24
        File htmlFile
25
        def pages = []
26
        def txtname, corpusname
27
        int wordsPerPage = 500
28
        boolean firstWord
29
        boolean cutBefore = true;
30
        
31
        public BuildFacsEditions(File xmlFile, File editionDir, String corpusname, String txtname, int wordsPerPage) {
32
                inputData = xmlFile.toURI().toURL().openStream()
33
                factory = XMLInputFactory.newInstance()
34
                parser = factory.createXMLStreamReader(inputData)
35
                
36
                this.xmlFile = xmlFile
37
                this.editionDir = editionDir
38
                this.txtname = txtname
39
                this.wordsPerPage = wordsPerPage
40
        }
41
        
42
        int n = 0;
43
        private boolean createNextOutput()
44
        {
45
                try {
46
                        def tags = closeMultiWriter();
47
                        for (int i = 0 ; i < tags.size() ; i++) {
48
                                String tag = tags[i]
49
                                if ("body" != tag) {
50
                                        tags.remove(i--)
51
                                } else {
52
                                        tags.remove(i--) // remove "body"
53
                                        break; // remove elements until "body tag
54
                                }
55
                        }
56
                        n++
57
                        htmlFile = new File(editionDir, "${txtname}_${n}.html")
58
                        firstWord = true
59

    
60
                        pagedWriter = new StaxStackWriter(htmlFile, "UTF-8");
61

    
62
                        pagedWriter.writeStartDocument("UTF-8", "1.0")
63
                        pagedWriter.writeStartElement("html");
64
                        pagedWriter.writeEmptyElement("meta", ["http-equiv":"Content-Type", "content":"text/html","charset":"UTF-8"]);
65
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"txm.css"]);
66
                        pagedWriter.writeEmptyElement("link", ["rel":"stylesheet", "type":"text/css","href":"${corpusname}.css"]);
67
                        pagedWriter.writeStartElement("head");
68
                        pagedWriter.writeStartElement("title")
69
                        pagedWriter.writeCharacters(corpusname+" Edition - Page "+n)
70
                        pagedWriter.writeEndElement(); // </title>
71
                        pagedWriter.writeEndElement() // </head>
72
                        pagedWriter.writeStartElement("body") //<body>
73

    
74
                        pagedWriter.writeStartElements(tags);
75
                        return true;
76
                } catch (Exception ee) {
77
                        System.out.println(ee);
78
                        return false;
79
                }
80
        }
81
        
82
        private def closeMultiWriter()
83
        {
84
                if (pagedWriter != null) {
85
                        def tags = pagedWriter.getTagStack().clone();
86

    
87
                        if (firstWord) { // there was no words
88
                                pagedWriter.writeCharacters("");
89
                                pagedWriter.write("<span id=\"w_0\"/>");
90
                                pages << ["$n", "w_0"]
91
                        }
92
                        pagedWriter.writeEndElements();
93
                        pagedWriter.close();
94
                        return tags;
95
                } else {
96
                        return [];
97
                }
98
        }
99
        
100
        private writeImg(String src) {
101
                pagedWriter.writeStartElement("div");
102
                pagedWriter.writeEmptyElement("img", ["src":src]);
103
                pagedWriter.writeEndElement(); // </div>
104
        }
105
        
106
        public def process() {
107
                String wordid = "w_0"
108
                boolean start = false
109
                String localname
110
                
111
                createNextOutput();
112
                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
113
                        switch (event) {
114
                                case XMLStreamConstants.START_ELEMENT:
115
                                        localname = parser.getLocalName();
116
                                        switch (localname) {
117
                                                case "text":
118
                                                        start = true
119
                                                break;
120
                                                case "w":
121
                                                        if (firstWord) {
122
                                                                wordid = parser.getAttributeValue(null, "id");
123
                                                                firstWord = false;
124
                                                        }
125
                                                break;
126
                                                case "pb":
127
                                                        if (cutBefore) {
128
                                                                pages << ["$n", wordid]
129
                                                                // WRITE PB
130
                                                                createNextOutput()
131
                                                        } else {
132
                                                                createNextOutput()
133
                                                                pages << ["$n", wordid]
134
                                                                // WRITE PB
135
                                                        }
136
                                                break;
137
                                        }
138
                                break;
139
                        }
140
                }
141
                closeMultiWriter()
142
                if (parser != null) parser.close();
143
                if (inputData != null) inputData.close();
144
                return pages
145
        }
146
}