Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / WriteIdAndNAttributes.groovy @ 2473

History | View | Annotate | Download (5.7 kB)

1
package org.txm.scripts.importer
2

    
3
import org.txm.importer.StaxIdentityParser
4

    
5
class WriteIdAndNAttributes extends StaxIdentityParser {
6

    
7
        String textname
8

    
9
        int nMileStone = 1, nPb = 1, nCb = 1, nLb = 1, nW = 1, nSeg = 1, nRejet = 1;
10
        String previousMileStone, previousPb, previousCb, previousW;
11

    
12

    
13
        String PB = "pb", CB = "cb", LB = "lb", ID = "id", TYPE = "type",
14
        N = "n", CORRESP = "corresp", FACS="facs", W="w", PC="pc", SEG="seg",
15
        UNIT="unit", XML="xml", WP="wp", SURFACE="surface", POINT = ".", REJET ="rejet";
16

    
17
        public WriteIdAndNAttributes(File xmlFile, String textname) {
18
                super(xmlFile);
19

    
20
                this.textname = textname
21
        }
22

    
23
        protected void writeAttributes() {
24
                // do nothing
25
        }
26

    
27
        protected void processStartElement() {
28
                String id = null
29
                String n = null
30
                String type = null
31
                String corresp = null
32
                String facs = null
33

    
34
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
35
                        if (parser.getAttributeLocalName(i) == ID) {
36
                                id = parser.getAttributeValue(i)
37
                        } else if (parser.getAttributeLocalName(i) == N) {
38
                                n = parser.getAttributeValue(i)
39
                        } else if (parser.getAttributeLocalName(i) == TYPE) {
40
                                type = parser.getAttributeValue(i)
41
                        } else if (parser.getAttributeLocalName(i) == CORRESP) {
42
                                corresp = parser.getAttributeValue(i)
43
                        } else if (parser.getAttributeLocalName(i) == FACS) {
44
                                facs = parser.getAttributeValue(i)
45
                        }
46
                }
47

    
48
                super.processStartElement(); // attributes are not written because super.writeAttributes() is not called
49

    
50
                if (localname == "milestone" && parser.getAttributeValue(null, UNIT) == SURFACE) {
51
                        if (n == null) {
52
                                n = nMileStone
53
                        } else {
54
                                int tmp = Integer.parseInt(n)
55
                                if (nMileStone < tmp) {
56
                                        nMileStone = tmp
57
                                        n = tmp
58
                                } else {
59
                                        n = nMileStone;
60
                                }
61
                        }
62

    
63
                        if (id == null) { // don't rewrite "id"
64
                                if (facs == null) {
65
                                        println "Error: found milestone@type=\"surface\" with no @facs at "+parser.getLocation()
66
                                        throw new Exception("no facs attribute")
67
                                }
68
                                if (facs.lastIndexOf(POINT) > 0) facs = facs.substring(0, facs.lastIndexOf(POINT))
69
                                previousMileStone = "surf_$textname"+"_"+facs
70
                                id = previousMileStone
71
                        } else {
72
                                previousMileStone = id
73
                        }
74

    
75
                        nMileStone++
76
                } else if (localname == PB) {
77
                        if (n == null) {
78
                                n = nPb
79
                        } else {
80
                                try {
81
                                        int tmp = Integer.parseInt(n)
82
                                        if (nPb < tmp) {
83
                                                nPb = tmp
84
                                        } else {
85
                                                n = nPb;
86
                                        }
87
                                } catch(Exception e) { }
88
                        }
89

    
90
                        if (id == null) { // don't rewrite "id"
91
                                id = "page_${textname}_"+nPb
92
                                previousPb = "${textname}_"+nPb
93
                        } else {
94
                                previousPb = id
95
                        }
96

    
97
                        nRejet = nLb = nCb = 1
98
                        nPb++
99
                } else if (localname == CB) {
100
                        if (n == null) {
101
                                n = nCb
102
                        } else {
103
                                try {
104
                                        int tmp = Integer.parseInt(n)
105
                                        if (nCb < tmp) {
106
                                                nCb = tmp
107
                                        } else {
108
                                                n = nCb;
109
                                        }
110
                                } catch(Exception e) { }
111
                        }
112

    
113
                        if (id == null) { // don't rewrite "id"
114
                                previousCb = "${previousPb}_"+nCb
115
                                id = "col_${previousPb}_"+nCb
116
                        } else {
117
                                previousCb = id
118
                        }
119

    
120
                        nLb = nRejet = 1
121
                        nCb++
122
                } else if (localname == LB) {
123
                        if (n == null) {
124
                                if (REJET == type) {
125
                                        println "Warning: no 'n' attribute provided for 'rejet' line break at "+parser.getLocation()
126
                                }
127
                                
128
                                n = nLb
129
                        } else {
130
                                if (REJET != type) {
131
                                        try { // if n is numeric update nLgRelative
132
                                                int tmp = Integer.parseInt(n)
133
                                                nLb = tmp;
134
                                        } catch(Exception e) {
135
                                                // n is not numeric use it
136
                                        }
137
                                }
138
                        }
139

    
140
                        if (id == null) { // don't rewrite "id"
141
                                if (REJET == type) {
142
                                        id = "line_${previousCb}_"+n+"_r"+nRejet
143
                                } else {
144
                                        id = "line_${previousCb}_"+n
145
                                }
146
                        }
147

    
148
                        if (corresp == null && REJET == type) {
149
                                //println "Warning: no 'corresp' attribute provided for 'rejet' line break at "+parser.getLocation()
150
                                corresp = "#line_${previousCb}_"+n; // next word id in the same column
151
                        }
152

    
153
                        if (REJET != type) {
154
                                nLb++;
155
                        } else {
156
                                nRejet++;
157
                        }
158

    
159
                } else if (localname == W || localname == PC) {
160
                        if (n == null) {
161
                                n = nW
162
                        } else {
163
                                try {
164
                                        int tmp = Integer.parseInt(n)
165
                                        if (nW < tmp) {
166
                                                nW = tmp
167
                                        } else {
168
                                                n = nW;
169
                                        }
170
                                } catch(Exception e) { }
171
                        }
172

    
173
                        if (id == null) { // don't rewrite "id"
174
                                //previousW = "${previousPb}_"+nW
175
                                id = localname+"_${textname}_"+nW
176
                        } 
177
                        previousW = id
178
                        nSeg = 1
179
                        nW++
180
                } else if (localname == SEG && WP == type) {
181
                        if (n == null) {
182
                                n = nSeg
183
                        } else {
184
                                try {
185
                                        int tmp = Integer.parseInt(n)
186
                                        if (nSeg < tmp) {
187
                                                nSeg = tmp
188
                                        } else {
189
                                                n = nSeg;
190
                                        }
191
                                } catch(Exception e) { }
192
                        }
193

    
194
                        if (id == null) { // don't rewrite "id"
195
                                id = "w_p_"+previousW+"_"+nSeg
196
                        }
197
                        nSeg++
198
                }
199

    
200
                // write attributes except ID, N and CORRESP
201
                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
202
                        if (parser.getAttributeLocalName(i) == ID) {
203

    
204
                        } else if (parser.getAttributeLocalName(i) == N) {
205

    
206
                        }  else if (parser.getAttributeLocalName(i) == CORRESP) {
207

    
208
                        } else {
209
                                writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
210
                        }
211
                }
212
                //if (localname == "lb") println "write id: "+["xml", "id", id]
213
                if (id != null)
214
                        writeAttribute(XML, ID, id)
215
                if (n != null)
216
                        writeAttribute(null, N, n)
217
                if (corresp != null)
218
                        writeAttribute(null, CORRESP, corresp)
219
        }
220

    
221
        public static void main(String[] args) {
222
                File xmlFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28.xml")
223
                File outFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o.xml")
224

    
225
                WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlFile, "qgraal_cm")
226
                println wiana.process(outFile)
227

    
228
                String text = xmlFile.getText().replaceAll(">", ">\n");
229
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-p.xml").write(text);
230
                text = outFile.getText().replaceAll(">", ">\n");
231
                new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o-p.xml").write(text);
232
        }
233
}