Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / annotation / AnnotationInjectionFilter.groovy @ 499

History | View | Annotate | Download (4.7 kB)

1
package org.txm.macro.annotation
2

    
3
import javax.xml.stream.*
4

    
5
import org.txm.importer.StaxIdentityParser
6
import org.txm.importer.filters.*
7
import org.txm.importer.graal.PersonalNamespaceContext
8

    
9
/**
10
 * The Class AnnotationInjection.
11
 *
12
 * @author mdecorde
13
 *
14
 * inject annotation from a stand-off file into a xml-tei-txm
15
 * file
16
 */
17

    
18
public class AnnotationInjectionFilter extends StaxIdentityParser {
19

    
20
        public static String TXMNS = "http://textometrie.org/1.0"
21

    
22
        /** The xml reader factory. */
23
        private def factory;
24

    
25
        def xmlFile;
26
        def records;
27
        def properties;
28
        int n = 0;
29
        boolean debug = false
30

    
31
        public AnnotationInjectionFilter() {
32
                super(null, null, null);
33
        }
34
        
35
        /**
36
         * Instantiates a new annotation injection.
37
         *
38
         * @param url the xml-tei-txm file
39
         * @param anaurl the stand-off file
40
         */
41
        public AnnotationInjectionFilter(File xmlFile, def records, def properties, def debug) {
42
                super(xmlFile.toURI().toURL()); // init reader and writer
43
                
44
                this.debug = debug
45
                this.xmlFile = xmlFile
46
                this.records = records
47
                this.properties = properties
48
                this.n = 0;
49
                //println ""+records.size()+" lines to process..."
50

    
51
                try {
52
                        factory = XMLInputFactory.newInstance();
53
                } catch (XMLStreamException ex) {
54
                        System.out.println(ex);
55
                } catch (IOException ex) {
56
                        System.out.println("IOException while parsing ");
57
                }
58
        }
59

    
60
        def data; // the word id properties to add/replace
61
        String newform = null
62
        String wordId;
63
        String anaType;
64
        boolean start = false;
65
        boolean replaceAnnotation = false;
66
        protected void processStartElement() {
67
                // this block don't write but only do tests
68
                switch (localname) {
69
                        case "form":
70
                                if (start) {
71
                                        def f = data.get("word")
72
                                        if (f != null && f.length() > 0)
73
                                                newform = f
74
                                }
75
                        case "w":
76
                                for (int i= 0 ; i < parser.getAttributeCount() ; i++ ) {
77
                                        if (parser.getAttributeLocalName(i) == "id") {
78
                                                wordId = parser.getAttributeValue(i);
79
                                                data = records.get(wordId)
80
                                                if (records.containsKey(wordId)) {
81
                                                        n++;
82
                                                        start = true; // found a word to update !
83
                                                        //println "found word $wordId"
84
                                                }
85
                                                break;
86
                                        }
87
                                }
88
                                break;
89
                        case "ana":
90
                                if (start) { // found a word to update
91
                                        for (int i= 0 ; i < parser.getAttributeCount() ; i++) { // look for the "type" attribute
92
                                                if (parser.getAttributeLocalName(i) == "type") { 
93
                                                        anaType = parser.getAttributeValue(i);
94
                                                        anaType = anaType.substring(1)
95
                                                        //if (debug) println "type: $anaType"
96
                                                        replaceAnnotation = properties.contains(anaType) // there is a new value for this property
97
                                                        //if (replaceAnnotation) println " replace annotation $anaType"
98
                                                        break;
99
                                                }
100
                                        }
101
                                }
102
                                break;
103
                }
104

    
105
                super.processStartElement();
106
        }
107

    
108
        @Override
109
        protected void processCharacters() {
110
                if (start && replaceAnnotation) return; // don't write annotation chars if we are currently replacing
111
                if (newform != null) { // don't rewrite form content
112
                        // we are currently in a <w> tags
113
                } else {
114
                        super.processCharacters();
115
                }
116
        }
117

    
118
        protected void processEndElement() {
119
                if (start) { // write the new content of the <w> tags
120
                        switch (localname) {
121
                                case "w":
122
                                        start = false
123
                                        if (data.keySet().size() > 0) writer.writeCharacters("\n");
124
                                        // write remaining properties
125
                                        if (debug) if (data.size() > 0) println "new ana: " +data
126
                                        for (def prop : data.keySet()) { // (1)
127
                                                // write prop
128
                                                writer.writeStartElement(TXMNS, "ana")
129
                                                writer.writeAttribute("resp", "txm")
130
                                                writer.writeAttribute("type", "#$prop")
131
                                                writer.writeCharacters(data.get(prop))
132
                                                writer.writeEndElement()
133
                                                writer.writeCharacters("\n")
134
                                                println " create annotation $prop with "+data.get(prop)
135
                                        }
136
                                        break;
137
                                case "form":
138
                                        if (start) newform = null
139
                                case "ana":
140
                                        if (replaceAnnotation) {
141
                                                writer.writeCharacters(data.get(anaType))
142
                                                if (debug) println " replace annotation $anaType content with "+data.get(anaType)
143
                                                data.remove(anaType) // the anaType is removed because it is now written. I don't need to write it here (1)
144
                                        }
145
                                        anaType = ""
146
                                        replaceAnnotation = false;
147
                                        break;
148
                        }
149
                }
150
                super.processEndElement();
151
        }
152

    
153
        /**
154
         * The main method.
155
         *
156
         * @param args the arguments
157
         */
158
        public static void main(String[] args) {
159

    
160
                File xmlFile = new File("/home/mdecorde/TEMP/annotation/01_DeGaulle.xml")
161
                def properties = ["p1", "lemma"]
162
                def records = [
163
                        "w_1":["p1":"a", "lemma":"b"],
164
                        "w_224":["p1":"c", "lemma":"d"],
165
                        "w_400":["p1":"e", "lemma":"f"],
166
                        "w_750":["p1":"g", "lemma":"h"],
167
                        "w_753":["p1":"i", "lemma":"j"],
168
                        "w_756":["p1":"k", "lemma":"l"]
169
                ]
170

    
171
                def builder = new AnnotationInjectionFilter(xmlFile, records, properties);
172
                builder.process(new File("/home/mdecorde/TEMP", "rez.xml"));
173

    
174
                return;
175
        }
176
}