Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / annotation / AnnotationInjectionFilter.groovy @ 499

History | View | Annotate | Download (4.7 kB)

1 321 mdecorde
package org.txm.macro.annotation
2 321 mdecorde
3 321 mdecorde
import javax.xml.stream.*
4 321 mdecorde
5 321 mdecorde
import org.txm.importer.StaxIdentityParser
6 321 mdecorde
import org.txm.importer.filters.*
7 479 mdecorde
import org.txm.importer.graal.PersonalNamespaceContext
8 321 mdecorde
9 321 mdecorde
/**
10 321 mdecorde
 * The Class AnnotationInjection.
11 321 mdecorde
 *
12 321 mdecorde
 * @author mdecorde
13 321 mdecorde
 *
14 321 mdecorde
 * inject annotation from a stand-off file into a xml-tei-txm
15 321 mdecorde
 * file
16 321 mdecorde
 */
17 321 mdecorde
18 321 mdecorde
public class AnnotationInjectionFilter extends StaxIdentityParser {
19 321 mdecorde
20 321 mdecorde
        public static String TXMNS = "http://textometrie.org/1.0"
21 321 mdecorde
22 321 mdecorde
        /** The xml reader factory. */
23 321 mdecorde
        private def factory;
24 321 mdecorde
25 321 mdecorde
        def xmlFile;
26 321 mdecorde
        def records;
27 321 mdecorde
        def properties;
28 321 mdecorde
        int n = 0;
29 321 mdecorde
        boolean debug = false
30 321 mdecorde
31 321 mdecorde
        public AnnotationInjectionFilter() {
32 321 mdecorde
                super(null, null, null);
33 321 mdecorde
        }
34 321 mdecorde
35 321 mdecorde
        /**
36 321 mdecorde
         * Instantiates a new annotation injection.
37 321 mdecorde
         *
38 321 mdecorde
         * @param url the xml-tei-txm file
39 321 mdecorde
         * @param anaurl the stand-off file
40 321 mdecorde
         */
41 321 mdecorde
        public AnnotationInjectionFilter(File xmlFile, def records, def properties, def debug) {
42 321 mdecorde
                super(xmlFile.toURI().toURL()); // init reader and writer
43 321 mdecorde
44 321 mdecorde
                this.debug = debug
45 321 mdecorde
                this.xmlFile = xmlFile
46 321 mdecorde
                this.records = records
47 321 mdecorde
                this.properties = properties
48 321 mdecorde
                this.n = 0;
49 321 mdecorde
                //println ""+records.size()+" lines to process..."
50 321 mdecorde
51 321 mdecorde
                try {
52 321 mdecorde
                        factory = XMLInputFactory.newInstance();
53 321 mdecorde
                } catch (XMLStreamException ex) {
54 321 mdecorde
                        System.out.println(ex);
55 321 mdecorde
                } catch (IOException ex) {
56 321 mdecorde
                        System.out.println("IOException while parsing ");
57 321 mdecorde
                }
58 321 mdecorde
        }
59 321 mdecorde
60 321 mdecorde
        def data; // the word id properties to add/replace
61 321 mdecorde
        String newform = null
62 321 mdecorde
        String wordId;
63 321 mdecorde
        String anaType;
64 321 mdecorde
        boolean start = false;
65 321 mdecorde
        boolean replaceAnnotation = false;
66 321 mdecorde
        protected void processStartElement() {
67 321 mdecorde
                // this block don't write but only do tests
68 321 mdecorde
                switch (localname) {
69 321 mdecorde
                        case "form":
70 321 mdecorde
                                if (start) {
71 321 mdecorde
                                        def f = data.get("word")
72 321 mdecorde
                                        if (f != null && f.length() > 0)
73 321 mdecorde
                                                newform = f
74 321 mdecorde
                                }
75 321 mdecorde
                        case "w":
76 321 mdecorde
                                for (int i= 0 ; i < parser.getAttributeCount() ; i++ ) {
77 321 mdecorde
                                        if (parser.getAttributeLocalName(i) == "id") {
78 321 mdecorde
                                                wordId = parser.getAttributeValue(i);
79 321 mdecorde
                                                data = records.get(wordId)
80 321 mdecorde
                                                if (records.containsKey(wordId)) {
81 321 mdecorde
                                                        n++;
82 321 mdecorde
                                                        start = true; // found a word to update !
83 321 mdecorde
                                                        //println "found word $wordId"
84 321 mdecorde
                                                }
85 321 mdecorde
                                                break;
86 321 mdecorde
                                        }
87 321 mdecorde
                                }
88 321 mdecorde
                                break;
89 321 mdecorde
                        case "ana":
90 321 mdecorde
                                if (start) { // found a word to update
91 321 mdecorde
                                        for (int i= 0 ; i < parser.getAttributeCount() ; i++) { // look for the "type" attribute
92 321 mdecorde
                                                if (parser.getAttributeLocalName(i) == "type") {
93 321 mdecorde
                                                        anaType = parser.getAttributeValue(i);
94 321 mdecorde
                                                        anaType = anaType.substring(1)
95 321 mdecorde
                                                        //if (debug) println "type: $anaType"
96 321 mdecorde
                                                        replaceAnnotation = properties.contains(anaType) // there is a new value for this property
97 321 mdecorde
                                                        //if (replaceAnnotation) println " replace annotation $anaType"
98 321 mdecorde
                                                        break;
99 321 mdecorde
                                                }
100 321 mdecorde
                                        }
101 321 mdecorde
                                }
102 321 mdecorde
                                break;
103 321 mdecorde
                }
104 321 mdecorde
105 321 mdecorde
                super.processStartElement();
106 321 mdecorde
        }
107 321 mdecorde
108 321 mdecorde
        @Override
109 321 mdecorde
        protected void processCharacters() {
110 321 mdecorde
                if (start && replaceAnnotation) return; // don't write annotation chars if we are currently replacing
111 321 mdecorde
                if (newform != null) { // don't rewrite form content
112 321 mdecorde
                        // we are currently in a <w> tags
113 321 mdecorde
                } else {
114 321 mdecorde
                        super.processCharacters();
115 321 mdecorde
                }
116 321 mdecorde
        }
117 321 mdecorde
118 321 mdecorde
        protected void processEndElement() {
119 321 mdecorde
                if (start) { // write the new content of the <w> tags
120 321 mdecorde
                        switch (localname) {
121 321 mdecorde
                                case "w":
122 321 mdecorde
                                        start = false
123 321 mdecorde
                                        if (data.keySet().size() > 0) writer.writeCharacters("\n");
124 321 mdecorde
                                        // write remaining properties
125 321 mdecorde
                                        if (debug) if (data.size() > 0) println "new ana: " +data
126 321 mdecorde
                                        for (def prop : data.keySet()) { // (1)
127 321 mdecorde
                                                // write prop
128 321 mdecorde
                                                writer.writeStartElement(TXMNS, "ana")
129 321 mdecorde
                                                writer.writeAttribute("resp", "txm")
130 321 mdecorde
                                                writer.writeAttribute("type", "#$prop")
131 321 mdecorde
                                                writer.writeCharacters(data.get(prop))
132 321 mdecorde
                                                writer.writeEndElement()
133 321 mdecorde
                                                writer.writeCharacters("\n")
134 321 mdecorde
                                                println " create annotation $prop with "+data.get(prop)
135 321 mdecorde
                                        }
136 321 mdecorde
                                        break;
137 321 mdecorde
                                case "form":
138 321 mdecorde
                                        if (start) newform = null
139 321 mdecorde
                                case "ana":
140 321 mdecorde
                                        if (replaceAnnotation) {
141 321 mdecorde
                                                writer.writeCharacters(data.get(anaType))
142 321 mdecorde
                                                if (debug) println " replace annotation $anaType content with "+data.get(anaType)
143 321 mdecorde
                                                data.remove(anaType) // the anaType is removed because it is now written. I don't need to write it here (1)
144 321 mdecorde
                                        }
145 321 mdecorde
                                        anaType = ""
146 321 mdecorde
                                        replaceAnnotation = false;
147 321 mdecorde
                                        break;
148 321 mdecorde
                        }
149 321 mdecorde
                }
150 321 mdecorde
                super.processEndElement();
151 321 mdecorde
        }
152 321 mdecorde
153 321 mdecorde
        /**
154 321 mdecorde
         * The main method.
155 321 mdecorde
         *
156 321 mdecorde
         * @param args the arguments
157 321 mdecorde
         */
158 321 mdecorde
        public static void main(String[] args) {
159 321 mdecorde
160 321 mdecorde
                File xmlFile = new File("/home/mdecorde/TEMP/annotation/01_DeGaulle.xml")
161 321 mdecorde
                def properties = ["p1", "lemma"]
162 321 mdecorde
                def records = [
163 321 mdecorde
                        "w_1":["p1":"a", "lemma":"b"],
164 321 mdecorde
                        "w_224":["p1":"c", "lemma":"d"],
165 321 mdecorde
                        "w_400":["p1":"e", "lemma":"f"],
166 321 mdecorde
                        "w_750":["p1":"g", "lemma":"h"],
167 321 mdecorde
                        "w_753":["p1":"i", "lemma":"j"],
168 321 mdecorde
                        "w_756":["p1":"k", "lemma":"l"]
169 321 mdecorde
                ]
170 321 mdecorde
171 321 mdecorde
                def builder = new AnnotationInjectionFilter(xmlFile, records, properties);
172 321 mdecorde
                builder.process(new File("/home/mdecorde/TEMP", "rez.xml"));
173 321 mdecorde
174 321 mdecorde
                return;
175 321 mdecorde
        }
176 321 mdecorde
}