Statistics
| Revision:

ccc / projets / CMC2ELAN / src / SplitTEICMRPerLocutor.groovy @ 2

History | View | Annotate | Download (5.2 kB)

1

    
2

    
3
import java.io.File;
4
import java.net.URL;
5

    
6
import javax.xml.stream.*;
7

    
8

    
9
public class SplitTEICMRPerLocutor extends StaxIdentityParser {
10

    
11
        File outputDirectory;
12
        def writers = [:];
13

    
14
        public SplitTEICMRPerLocutor(File inputFile, File outputDirectory) {
15
                super(inputFile);
16
                this.outputDirectory = outputDirectory;
17
                outputDirectory.mkdir()
18
        }
19

    
20
        protected void processStartElement() {
21
                if (parser.getLocalName() == "posting") {
22
                        for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
23
                                if (parser.getAttributeLocalName(i) == "who") {
24
                                        String loc = parser.getAttributeValue(i)
25
                                        writer = writers.get(loc) // switch writer on locutor
26
                                        break;
27
                                }
28
                        }
29
                }
30

    
31
                if (writer != null) {
32
                        processStartElement(writer);
33
                } else {
34
                        for (def swriter : writers.values()) processStartElement(swriter);
35
                }
36
        }
37

    
38
        protected void processStartElement(def swriter)
39
        {
40
                String prefix = parser.getPrefix();
41

    
42
                if (prefix != null && prefix.length() > 0)
43
                        swriter.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
44
                else
45
                        swriter.writeStartElement(localname);
46

    
47
                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
48
                        swriter.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
49
                }
50

    
51
                writeAttributes(swriter);
52
        }
53

    
54
        private void _processStartElement() {
55
                String prefix = parser.getPrefix();
56

    
57
                if (prefix != null && prefix.length() > 0)
58
                        writer.writeStartElement(Nscontext.getNamespaceURI(prefix), localname)
59
                else
60
                        writer.writeStartElement(localname);
61

    
62
                for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
63
                        writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
64
                }
65

    
66
                writeAttributes();
67
        }
68

    
69

    
70
        protected void processNamespace() {
71
                if (writer != null) {
72
                        writer.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
73
                } else {
74
                        for (def swriter : writers.values()) swriter.writeNamespace(parser.getPrefix(), parser.getNamespaceURI());
75
                }
76
        }
77

    
78
        protected void writeAttributes() {
79
                if (writer != null) {
80
                        writeAttributes(writer);
81
                } else {
82
                        for (def swriter : writers.values()) writeAttributes(swriter);
83
                }
84
        }
85

    
86
        protected void writeAttributes(def swriter) {
87
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
88
                        String attrPrefix = parser.getAttributePrefix(i);
89
                        if (attrPrefix != null && attrPrefix.length() > 0)
90
                                swriter.writeAttribute(attrPrefix+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i));
91
                        else
92
                                swriter.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
93
                }
94
        }
95

    
96
        protected void processCharacters()
97
        {
98
                if (writer != null) {
99
                        writer.writeCharacters(parser.getText());
100
                } else {
101
                        for (def swriter : writers.values()) swriter.writeCharacters(parser.getText());
102
                }
103
        }
104

    
105
        protected void processProcessingInstruction()
106
        {
107
                if (writer != null) {
108
                        writer.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
109
                } else {
110
                        for (def swriter : writers.values()) swriter.writeProcessingInstruction(parser.getPITarget(), parser.getPIData());
111
                }
112
        }
113

    
114
        protected void processDTD()
115
        {
116
                if (writer != null) {
117
                        writer.writeDTD(parser.getText());
118
                } else {
119
                        for (def swriter : writers.values()) swriter.writeDTD(parser.getText());
120
                }
121
        }
122

    
123
        protected void processCDATA()
124
        {
125
                if (writer != null) {
126
                        writer.writeCData(parser.getText())
127
                } else {
128
                        for (def swriter : writers.values()) swriter.writeCData(parser.getText())
129
                }
130
        }
131

    
132
        protected void processComment()
133
        {
134
                if (writer != null) {
135
                        writer.writeComment(parser.getText());
136
                } else {
137
                        for (def swriter : writers.values()) swriter.writeComment(parser.getText());
138
                }
139
        }
140

    
141
        protected void processEndElement()
142
        {
143
                if (writer != null) {
144
                        writer.writeEndElement();
145
                } else {
146
                        for (def swriter : writers.values()) swriter.writeEndElement();
147
                }
148
                if (parser.getLocalName() == "posting") writer = null;
149
        }
150

    
151
        protected void processEndDocument() {
152
                if (writer != null) {
153
                        writer.writeEndDocument();
154
                } else {
155
                        for (def swriter : writers.values()) swriter.writeEndDocument();
156
                }
157
        }
158

    
159
        protected void processEntityReference() {
160
                if (writer != null) {
161
                        writer.writeEntityRef(parser.getLocalName());
162
                } else {
163
                        for (def swriter : writers.values()) swriter.writeEntityRef(parser.getLocalName());
164
                }
165
        }
166

    
167
        public boolean processLocutors(def locutors) {
168
                for (String loc : locutors) {
169
                        println "Create writer for $loc"
170
                        writers.put(loc, new StaxStackWriter(new File(outputDirectory, loc+"-teicmr.xml").newWriter("UTF-8")))
171
                }
172
                println "writers: $writers"
173

    
174
                for (def swriter : writers.values()) {
175
                        swriter.writeStartDocument("UTF-8", "1.0");
176
                        swriter.writeCharacters("\n");
177
                }
178

    
179
                boolean ret = process(writer);
180

    
181
                for (def swriter : writers.values()) {
182
                        try {swriter.close();} catch(Exception e){println "close writer exep: "+e}
183
                }
184

    
185
                if (parser != null)
186
                        try {parser.close()} catch(Exception e){println "parser exep: "+e}
187

    
188
                return ret;
189
        }
190

    
191
        public static void main(String[] args) {
192
                File inputFile = new File("/home/mdecorde/xml/comere/ismael-textchat.xml")
193
                File outputDirectory = new File("/home/mdecorde/xml/comere/split_out")
194

    
195
                def p = new SplitTEICMRPerLocutor(inputFile, outputDirectory)
196
                println p.processLocutors(["s_1", "s_10", "s_11", "s_12", "s_13", "s_14", "s_3", "s_4", "s_5", "s_6", "s_7", "s_8", "s_9", "tt_1", "tt_12", "tt_2", "tt_3", "tt_4", "tt_5", "tt_6", "tt_7", "tt_8"])
197
        }
198
}