Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / WriteIdAndNAttributes.groovy @ 187

History | View | Annotate | Download (5.7 kB)

1
package org.txm.importer
2

    
3
class WriteIdAndNAttributes extends StaxIdentityParser {
4

    
5
        String textname
6

    
7
        int nMileStone = 1, nPb = 1, nCb = 1, nLb = 1, nW = 1, nSeg = 1, nRejet = 1;
8
        String previousMileStone, previousPb, previousCb, previousW;
9

    
10

    
11
        String PB = "pb", CB = "cb", LB = "lb", ID = "id", TYPE = "type",
12
        N = "n", CORRESP = "corresp", FACS="facs", W="w", PC="pc", SEG="seg",
13
        UNIT="unit", XML="xml", WP="wp", SURFACE="surface", POINT = ".", REJET ="rejet";
14

    
15
        public WriteIdAndNAttributes(File xmlFile, String textname) {
16
                super(xmlFile);
17

    
18
                this.textname = textname
19
        }
20

    
21
        protected void writeAttributes() {
22
                // do nothing
23
        }
24

    
25
        protected void processStartElement() {
26
                String id = null
27
                String n = null
28
                String type = null
29
                String corresp = null
30
                String facs = null
31

    
32
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
33
                        if (parser.getAttributeLocalName(i) == ID) {
34
                                id = parser.getAttributeValue(i)
35
                        } else if (parser.getAttributeLocalName(i) == N) {
36
                                n = parser.getAttributeValue(i)
37
                        } else if (parser.getAttributeLocalName(i) == TYPE) {
38
                                type = parser.getAttributeValue(i)
39
                        } else if (parser.getAttributeLocalName(i) == CORRESP) {
40
                                corresp = parser.getAttributeValue(i)
41
                        } else if (parser.getAttributeLocalName(i) == FACS) {
42
                                facs = parser.getAttributeValue(i)
43
                        }
44
                }
45

    
46
                super.processStartElement(); // attributes are not written because super.writeAttributes() is not called
47

    
48
                if (localname == "milestone" && parser.getAttributeValue(null, UNIT) == SURFACE) {
49
                        if (n == null) {
50
                                n = nMileStone
51
                        } else {
52
                                int tmp = Integer.parseInt(n)
53
                                if (nMileStone < tmp) {
54
                                        nMileStone = tmp
55
                                        n = tmp
56
                                } else {
57
                                        n = nMileStone;
58
                                }
59
                        }
60

    
61
                        if (id == null) { // don't rewrite "id"
62
                                if (facs == null) {
63
                                        println "Error: found milestone@type=\"surface\" with no @facs at "+parser.getLocation()
64
                                        throw new Exception("no facs attribute")
65
                                }
66
                                if (facs.lastIndexOf(POINT) > 0) facs = facs.substring(0, facs.lastIndexOf(POINT))
67
                                previousMileStone = "surf_$textname"+"_"+facs
68
                                id = previousMileStone
69
                        } else {
70
                                previousMileStone = id
71
                        }
72

    
73
                        nMileStone++
74
                } else if (localname == PB) {
75
                        if (n == null) {
76
                                n = nPb
77
                        } else {
78
                                try {
79
                                        int tmp = Integer.parseInt(n)
80
                                        if (nPb < tmp) {
81
                                                nPb = tmp
82
                                        } else {
83
                                                n = nPb;
84
                                        }
85
                                } catch(Exception e) { }
86
                        }
87

    
88
                        if (id == null) { // don't rewrite "id"
89
                                id = "page_${textname}_"+nPb
90
                                previousPb = "${textname}_"+nPb
91
                        } else {
92
                                previousPb = id
93
                        }
94

    
95
                        nRejet = nLb = nCb = 1
96
                        nPb++
97
                } else if (localname == CB) {
98
                        if (n == null) {
99
                                n = nCb
100
                        } else {
101
                                try {
102
                                        int tmp = Integer.parseInt(n)
103
                                        if (nCb < tmp) {
104
                                                nCb = tmp
105
                                        } else {
106
                                                n = nCb;
107
                                        }
108
                                } catch(Exception e) { }
109
                        }
110

    
111
                        if (id == null) { // don't rewrite "id"
112
                                previousCb = "${previousPb}_"+nCb
113
                                id = "col_${previousPb}_"+nCb
114
                        } else {
115
                                previousCb = id
116
                        }
117

    
118
                        nLb = nRejet = 1
119
                        nCb++
120
                } else if (localname == LB) {
121
                        if (n == null) {
122
                                if (REJET == type) {
123
                                        println "Warning: no 'n' attribute provided for 'rejet' line break at "+parser.getLocation()
124
                                }
125
                                
126
                                n = nLb
127
                        } else {
128
                                if (REJET != type) {
129
                                        try { // if n is numeric update nLgRelative
130
                                                int tmp = Integer.parseInt(n)
131
                                                nLb = tmp;
132
                                        } catch(Exception e) {
133
                                                // n is not numeric use it
134
                                        }
135
                                }
136
                        }
137

    
138
                        if (id == null) { // don't rewrite "id"
139
                                if (REJET == type) {
140
                                        id = "line_${previousCb}_"+n+"_r"+nRejet
141
                                } else {
142
                                        id = "line_${previousCb}_"+n
143
                                }
144
                        }
145

    
146
                        if (corresp == null && REJET == type) {
147
                                //println "Warning: no 'corresp' attribute provided for 'rejet' line break at "+parser.getLocation()
148
                                corresp = "#line_${previousCb}_"+n; // next word id in the same column
149
                        }
150

    
151
                        if (REJET != type) {
152
                                nLb++;
153
                        } else {
154
                                nRejet++;
155
                        }
156

    
157
                } else if (localname == W || localname == PC) {
158
                        if (n == null) {
159
                                n = nW
160
                        } else {
161
                                try {
162
                                        int tmp = Integer.parseInt(n)
163
                                        if (nW < tmp) {
164
                                                nW = tmp
165
                                        } else {
166
                                                n = nW;
167
                                        }
168
                                } catch(Exception e) { }
169
                        }
170

    
171
                        if (id == null) { // don't rewrite "id"
172
                                //previousW = "${previousPb}_"+nW
173
                                id = localname+"_${textname}_"+nW
174
                        } 
175
                        previousW = id
176
                        nSeg = 1
177
                        nW++
178
                } else if (localname == SEG && WP == type) {
179
                        if (n == null) {
180
                                n = nSeg
181
                        } else {
182
                                try {
183
                                        int tmp = Integer.parseInt(n)
184
                                        if (nSeg < tmp) {
185
                                                nSeg = tmp
186
                                        } else {
187
                                                n = nSeg;
188
                                        }
189
                                } catch(Exception e) { }
190
                        }
191

    
192
                        if (id == null) { // don't rewrite "id"
193
                                id = "w_p_"+previousW+"_"+nSeg
194
                        }
195
                        nSeg++
196
                }
197

    
198
                // write attributes except ID, N and CORRESP
199
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
200
                        if (parser.getAttributeLocalName(i) == ID) {
201

    
202
                        } else if (parser.getAttributeLocalName(i) == N) {
203

    
204
                        }  else if (parser.getAttributeLocalName(i) == CORRESP) {
205

    
206
                        } else {
207
                                writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
208
                        }
209
                }
210
                //if (localname == "lb") println "write id: "+["xml", "id", id]
211
                if (id != null)
212
                        writeAttribute(XML, ID, id)
213
                if (n != null)
214
                        writeAttribute(null, N, n)
215
                if (corresp != null)
216
                        writeAttribute(null, CORRESP, corresp)
217
        }
218

    
219
        public static void main(String[] args) {
220
                File xmlFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28.xml")
221
                File outFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o.xml")
222

    
223
                WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlFile, "qgraal_cm")
224
                println wiana.process(outFile)
225

    
226
                String text = xmlFile.getText().replaceAll(">", ">\n");
227
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-p.xml").write(text);
228
                text = outFile.getText().replaceAll(">", ">\n");
229
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o-p.xml").write(text);
230
        }
231
}