Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / teicmc / SplitTEICMRPerLocutor.groovy @ 1000

History | View | Annotate | Download (5.4 kB)

1
package org.txm.scripts.importer.teicmc
2

    
3
import java.io.File;
4
import java.net.URL;
5

    
6
import javax.xml.stream.*;
7

    
8
import org.txm.importer.StaxIdentityParser;
9
import org.txm.scripts.importer.StaxStackWriter;
10

    
11
public class SplitTEICMRPerLocutor extends StaxIdentityParser {
12

    
13
        File outputDirectory;
14
        def writers = [:];
15

    
16
        public SplitTEICMRPerLocutor(File inputFile, File outputDirectory) {
17
                super(inputFile);
18
                this.outputDirectory = outputDirectory;
19
                outputDirectory.mkdir()
20
        }
21

    
22
        protected void processStartElement() {
23
                if (parser.getLocalName() == "posting") {
24
                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
25
                                if (parser.getAttributeLocalName(i) == "who") {
26
                                        String loc = parser.getAttributeValue(i)
27
                                        writer = writers.get(loc) // switch writer on locutor
28
                                        break;
29
                                }
30
                        }
31
                }
32

    
33
                if (writer != null) {
34
                        processStartElement(writer);
35
                } else {
36
                        for (def swriter : writers.values()) processStartElement(swriter);
37
                }
38
        }
39

    
40
        protected void processStartElement(def swriter)
41
        {
42
                String prefix = parser.getPrefix();
43

    
44
                if (prefix != null && prefix.length() > 0)
45
                        swriter.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
46
                else
47
                        swriter.writeStartElement(localname);
48

    
49
                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
50
                        swriter.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
51
                }
52

    
53
                writeAttributes(swriter);
54
        }
55

    
56
        private void _processStartElement() {
57
                String prefix = parser.getPrefix();
58

    
59
                if (prefix != null && prefix.length() > 0)
60
                        writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
61
                else
62
                        writer.writeStartElement(localname);
63

    
64
                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
65
                        writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
66
                }
67

    
68
                writeAttributes();
69
        }
70

    
71

    
72
        protected void processNamespace() {
73
                if (writer != null) {
74
                        writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
75
                } else {
76
                        for (def swriter : writers.values()) swriter.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
77
                }
78
        }
79

    
80
        protected void writeAttributes() {
81
                if (writer != null) {
82
                        writeAttributes(writer);
83
                } else {
84
                        for (def swriter : writers.values()) writeAttributes(swriter);
85
                }
86
        }
87

    
88
        protected void writeAttributes(def swriter) {
89
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
90
                        String attrPrefix = parser.getAttributePrefix(i);
91
                        if (attrPrefix != null && attrPrefix.length() > 0)
92
                                swriter.writeAttribute(attrPrefix+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i));
93
                        else
94
                                swriter.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
95
                }
96
        }
97

    
98
        protected void processCharacters()
99
        {
100
                if (writer != null) {
101
                        writer.writeCharacters(parser.getText());
102
                } else {
103
                        for (def swriter : writers.values()) swriter.writeCharacters(parser.getText());
104
                }
105
        }
106

    
107
        protected void processProcessingInstruction()
108
        {
109
                if (writer != null) {
110
                        writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
111
                } else {
112
                        for (def swriter : writers.values()) swriter.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
113
                }
114
        }
115

    
116
        protected void processDTD()
117
        {
118
                if (writer != null) {
119
                        writer.writeDTD(parser.getText());
120
                } else {
121
                        for (def swriter : writers.values()) swriter.writeDTD(parser.getText());
122
                }
123
        }
124

    
125
        protected void processCDATA()
126
        {
127
                if (writer != null) {
128
                        writer.writeCData(parser.getText())
129
                } else {
130
                        for (def swriter : writers.values()) swriter.writeCData(parser.getText())
131
                }
132
        }
133

    
134
        protected void processComment()
135
        {
136
                if (writer != null) {
137
                        writer.writeComment(parser.getText());
138
                } else {
139
                        for (def swriter : writers.values()) swriter.writeComment(parser.getText());
140
                }
141
        }
142

    
143
        protected void processEndElement()
144
        {
145
                if (writer != null) {
146
                        writer.writeEndElement();
147
                } else {
148
                        for (def swriter : writers.values()) swriter.writeEndElement();
149
                }
150
                if (parser.getLocalName() == "posting") writer = null;
151
        }
152

    
153
        protected void processEndDocument() {
154
                if (writer != null) {
155
                        writer.writeEndDocument();
156
                } else {
157
                        for (def swriter : writers.values()) swriter.writeEndDocument();
158
                }
159
        }
160

    
161
        protected void processEntityReference() {
162
                if (writer != null) {
163
                        writer.writeEntityRef(parser.getLocalName());
164
                } else {
165
                        for (def swriter : writers.values()) swriter.writeEntityRef(parser.getLocalName());
166
                }
167
        }
168

    
169
        public boolean processLocutors(def locutors) {
170
                for (String loc : locutors) {
171
                        println "Create writer for $loc"
172
                        writers.put(loc, new StaxStackWriter(new File(outputDirectory, loc+"-teicmr.xml"), "UTF8"))
173
                }
174
                println "writers: $writers"
175

    
176
                for (def swriter : writers.values()) {
177
                        swriter.writeStartDocument("UTF-8", "1.0");
178
                        swriter.writeCharacters("\n");
179
                }
180

    
181
                boolean ret = process(writer);
182

    
183
                for (def swriter : writers.values()) {
184
                        try {swriter.close();} catch(Exception e){println "close writer exep: "+e}
185
                }
186

    
187
                if (parser != null)
188
                        try {parser.close()} catch(Exception e){println "parser exep: "+e}
189

    
190
                return ret;
191
        }
192

    
193
        public static void main(String[] args) {
194
                File inputFile = new File("/home/mdecorde/xml/comere/ismael-textchat.xml")
195
                File outputDirectory = new File("/home/mdecorde/xml/comere/split_out")
196

    
197
                def p = new SplitTEICMRPerLocutor(inputFile, outputDirectory)
198
                println p.processLocutors(["s_1", "s_10", "s_11", "s_12", "s_13", "s_14", "s_3", "s_4", "s_5", "s_6", "s_7", "s_8", "s_9", "tt_1", "tt_12", "tt_2", "tt_3", "tt_4", "tt_5", "tt_6", "tt_7", "tt_8"])
199
        }
200
}