Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / WriteIdAndNAttributes.groovy @ 2473

History | View | Annotate | Download (5.7 kB)

1 1000 mdecorde
package org.txm.scripts.importer
2 881 mdecorde
3 986 mdecorde
import org.txm.importer.StaxIdentityParser
4 986 mdecorde
5 881 mdecorde
class WriteIdAndNAttributes extends StaxIdentityParser {
6 881 mdecorde
7 881 mdecorde
        String textname
8 881 mdecorde
9 881 mdecorde
        int nMileStone = 1, nPb = 1, nCb = 1, nLb = 1, nW = 1, nSeg = 1, nRejet = 1;
10 881 mdecorde
        String previousMileStone, previousPb, previousCb, previousW;
11 881 mdecorde
12 881 mdecorde
13 881 mdecorde
        String PB = "pb", CB = "cb", LB = "lb", ID = "id", TYPE = "type",
14 881 mdecorde
        N = "n", CORRESP = "corresp", FACS="facs", W="w", PC="pc", SEG="seg",
15 881 mdecorde
        UNIT="unit", XML="xml", WP="wp", SURFACE="surface", POINT = ".", REJET ="rejet";
16 881 mdecorde
17 881 mdecorde
        public WriteIdAndNAttributes(File xmlFile, String textname) {
18 881 mdecorde
                super(xmlFile);
19 881 mdecorde
20 881 mdecorde
                this.textname = textname
21 881 mdecorde
        }
22 881 mdecorde
23 881 mdecorde
        protected void writeAttributes() {
24 881 mdecorde
                // do nothing
25 881 mdecorde
        }
26 881 mdecorde
27 881 mdecorde
        protected void processStartElement() {
28 881 mdecorde
                String id = null
29 881 mdecorde
                String n = null
30 881 mdecorde
                String type = null
31 881 mdecorde
                String corresp = null
32 881 mdecorde
                String facs = null
33 881 mdecorde
34 881 mdecorde
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
35 881 mdecorde
                        if (parser.getAttributeLocalName(i) == ID) {
36 881 mdecorde
                                id = parser.getAttributeValue(i)
37 881 mdecorde
                        } else if (parser.getAttributeLocalName(i) == N) {
38 881 mdecorde
                                n = parser.getAttributeValue(i)
39 881 mdecorde
                        } else if (parser.getAttributeLocalName(i) == TYPE) {
40 881 mdecorde
                                type = parser.getAttributeValue(i)
41 881 mdecorde
                        } else if (parser.getAttributeLocalName(i) == CORRESP) {
42 881 mdecorde
                                corresp = parser.getAttributeValue(i)
43 881 mdecorde
                        } else if (parser.getAttributeLocalName(i) == FACS) {
44 881 mdecorde
                                facs = parser.getAttributeValue(i)
45 881 mdecorde
                        }
46 881 mdecorde
                }
47 881 mdecorde
48 881 mdecorde
                super.processStartElement(); // attributes are not written because super.writeAttributes() is not called
49 881 mdecorde
50 881 mdecorde
                if (localname == "milestone" && parser.getAttributeValue(null, UNIT) == SURFACE) {
51 881 mdecorde
                        if (n == null) {
52 881 mdecorde
                                n = nMileStone
53 881 mdecorde
                        } else {
54 881 mdecorde
                                int tmp = Integer.parseInt(n)
55 881 mdecorde
                                if (nMileStone < tmp) {
56 881 mdecorde
                                        nMileStone = tmp
57 881 mdecorde
                                        n = tmp
58 881 mdecorde
                                } else {
59 881 mdecorde
                                        n = nMileStone;
60 881 mdecorde
                                }
61 881 mdecorde
                        }
62 881 mdecorde
63 881 mdecorde
                        if (id == null) { // don't rewrite "id"
64 881 mdecorde
                                if (facs == null) {
65 881 mdecorde
                                        println "Error: found milestone@type=\"surface\" with no @facs at "+parser.getLocation()
66 881 mdecorde
                                        throw new Exception("no facs attribute")
67 881 mdecorde
                                }
68 881 mdecorde
                                if (facs.lastIndexOf(POINT) > 0) facs = facs.substring(0, facs.lastIndexOf(POINT))
69 881 mdecorde
                                previousMileStone = "surf_$textname"+"_"+facs
70 881 mdecorde
                                id = previousMileStone
71 881 mdecorde
                        } else {
72 881 mdecorde
                                previousMileStone = id
73 881 mdecorde
                        }
74 881 mdecorde
75 881 mdecorde
                        nMileStone++
76 881 mdecorde
                } else if (localname == PB) {
77 881 mdecorde
                        if (n == null) {
78 881 mdecorde
                                n = nPb
79 881 mdecorde
                        } else {
80 881 mdecorde
                                try {
81 881 mdecorde
                                        int tmp = Integer.parseInt(n)
82 881 mdecorde
                                        if (nPb < tmp) {
83 881 mdecorde
                                                nPb = tmp
84 881 mdecorde
                                        } else {
85 881 mdecorde
                                                n = nPb;
86 881 mdecorde
                                        }
87 881 mdecorde
                                } catch(Exception e) { }
88 881 mdecorde
                        }
89 881 mdecorde
90 881 mdecorde
                        if (id == null) { // don't rewrite "id"
91 881 mdecorde
                                id = "page_${textname}_"+nPb
92 881 mdecorde
                                previousPb = "${textname}_"+nPb
93 881 mdecorde
                        } else {
94 881 mdecorde
                                previousPb = id
95 881 mdecorde
                        }
96 881 mdecorde
97 881 mdecorde
                        nRejet = nLb = nCb = 1
98 881 mdecorde
                        nPb++
99 881 mdecorde
                } else if (localname == CB) {
100 881 mdecorde
                        if (n == null) {
101 881 mdecorde
                                n = nCb
102 881 mdecorde
                        } else {
103 881 mdecorde
                                try {
104 881 mdecorde
                                        int tmp = Integer.parseInt(n)
105 881 mdecorde
                                        if (nCb < tmp) {
106 881 mdecorde
                                                nCb = tmp
107 881 mdecorde
                                        } else {
108 881 mdecorde
                                                n = nCb;
109 881 mdecorde
                                        }
110 881 mdecorde
                                } catch(Exception e) { }
111 881 mdecorde
                        }
112 881 mdecorde
113 881 mdecorde
                        if (id == null) { // don't rewrite "id"
114 881 mdecorde
                                previousCb = "${previousPb}_"+nCb
115 881 mdecorde
                                id = "col_${previousPb}_"+nCb
116 881 mdecorde
                        } else {
117 881 mdecorde
                                previousCb = id
118 881 mdecorde
                        }
119 881 mdecorde
120 881 mdecorde
                        nLb = nRejet = 1
121 881 mdecorde
                        nCb++
122 881 mdecorde
                } else if (localname == LB) {
123 881 mdecorde
                        if (n == null) {
124 881 mdecorde
                                if (REJET == type) {
125 881 mdecorde
                                        println "Warning: no 'n' attribute provided for 'rejet' line break at "+parser.getLocation()
126 881 mdecorde
                                }
127 881 mdecorde
128 881 mdecorde
                                n = nLb
129 881 mdecorde
                        } else {
130 881 mdecorde
                                if (REJET != type) {
131 881 mdecorde
                                        try { // if n is numeric update nLgRelative
132 881 mdecorde
                                                int tmp = Integer.parseInt(n)
133 881 mdecorde
                                                nLb = tmp;
134 881 mdecorde
                                        } catch(Exception e) {
135 881 mdecorde
                                                // n is not numeric use it
136 881 mdecorde
                                        }
137 881 mdecorde
                                }
138 881 mdecorde
                        }
139 881 mdecorde
140 881 mdecorde
                        if (id == null) { // don't rewrite "id"
141 881 mdecorde
                                if (REJET == type) {
142 881 mdecorde
                                        id = "line_${previousCb}_"+n+"_r"+nRejet
143 881 mdecorde
                                } else {
144 881 mdecorde
                                        id = "line_${previousCb}_"+n
145 881 mdecorde
                                }
146 881 mdecorde
                        }
147 881 mdecorde
148 881 mdecorde
                        if (corresp == null && REJET == type) {
149 881 mdecorde
                                //println "Warning: no 'corresp' attribute provided for 'rejet' line break at "+parser.getLocation()
150 881 mdecorde
                                corresp = "#line_${previousCb}_"+n; // next word id in the same column
151 881 mdecorde
                        }
152 881 mdecorde
153 881 mdecorde
                        if (REJET != type) {
154 881 mdecorde
                                nLb++;
155 881 mdecorde
                        } else {
156 881 mdecorde
                                nRejet++;
157 881 mdecorde
                        }
158 881 mdecorde
159 881 mdecorde
                } else if (localname == W || localname == PC) {
160 881 mdecorde
                        if (n == null) {
161 881 mdecorde
                                n = nW
162 881 mdecorde
                        } else {
163 881 mdecorde
                                try {
164 881 mdecorde
                                        int tmp = Integer.parseInt(n)
165 881 mdecorde
                                        if (nW < tmp) {
166 881 mdecorde
                                                nW = tmp
167 881 mdecorde
                                        } else {
168 881 mdecorde
                                                n = nW;
169 881 mdecorde
                                        }
170 881 mdecorde
                                } catch(Exception e) { }
171 881 mdecorde
                        }
172 881 mdecorde
173 881 mdecorde
                        if (id == null) { // don't rewrite "id"
174 881 mdecorde
                                //previousW = "${previousPb}_"+nW
175 881 mdecorde
                                id = localname+"_${textname}_"+nW
176 881 mdecorde
                        }
177 881 mdecorde
                        previousW = id
178 881 mdecorde
                        nSeg = 1
179 881 mdecorde
                        nW++
180 881 mdecorde
                } else if (localname == SEG && WP == type) {
181 881 mdecorde
                        if (n == null) {
182 881 mdecorde
                                n = nSeg
183 881 mdecorde
                        } else {
184 881 mdecorde
                                try {
185 881 mdecorde
                                        int tmp = Integer.parseInt(n)
186 881 mdecorde
                                        if (nSeg < tmp) {
187 881 mdecorde
                                                nSeg = tmp
188 881 mdecorde
                                        } else {
189 881 mdecorde
                                                n = nSeg;
190 881 mdecorde
                                        }
191 881 mdecorde
                                } catch(Exception e) { }
192 881 mdecorde
                        }
193 881 mdecorde
194 881 mdecorde
                        if (id == null) { // don't rewrite "id"
195 881 mdecorde
                                id = "w_p_"+previousW+"_"+nSeg
196 881 mdecorde
                        }
197 881 mdecorde
                        nSeg++
198 881 mdecorde
                }
199 881 mdecorde
200 881 mdecorde
                // write attributes except ID, N and CORRESP
201 881 mdecorde
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
202 881 mdecorde
                        if (parser.getAttributeLocalName(i) == ID) {
203 881 mdecorde
204 881 mdecorde
                        } else if (parser.getAttributeLocalName(i) == N) {
205 881 mdecorde
206 881 mdecorde
                        }  else if (parser.getAttributeLocalName(i) == CORRESP) {
207 881 mdecorde
208 881 mdecorde
                        } else {
209 881 mdecorde
                                writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
210 881 mdecorde
                        }
211 881 mdecorde
                }
212 881 mdecorde
                //if (localname == "lb") println "write id: "+["xml", "id", id]
213 881 mdecorde
                if (id != null)
214 881 mdecorde
                        writeAttribute(XML, ID, id)
215 881 mdecorde
                if (n != null)
216 881 mdecorde
                        writeAttribute(null, N, n)
217 881 mdecorde
                if (corresp != null)
218 881 mdecorde
                        writeAttribute(null, CORRESP, corresp)
219 881 mdecorde
        }
220 881 mdecorde
221 881 mdecorde
        public static void main(String[] args) {
222 881 mdecorde
                File xmlFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28.xml")
223 881 mdecorde
                File outFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o.xml")
224 881 mdecorde
225 881 mdecorde
                WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlFile, "qgraal_cm")
226 881 mdecorde
                println wiana.process(outFile)
227 881 mdecorde
228 881 mdecorde
                String text = xmlFile.getText().replaceAll(">", ">\n");
229 881 mdecorde
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-p.xml").write(text);
230 881 mdecorde
                text = outFile.getText().replaceAll(">", ">\n");
231 881 mdecorde
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o-p.xml").write(text);
232 881 mdecorde
        }
233 881 mdecorde
}