root / tmp / org.txm.core / src / java / org / txm / scripts / importer / WriteIdAndNAttributes.groovy @ 2473
History | View | Annotate | Download (5.7 kB)
1 | 1000 | mdecorde | package org.txm.scripts.importer
|
---|---|---|---|
2 | 881 | mdecorde | |
3 | 986 | mdecorde | import org.txm.importer.StaxIdentityParser |
4 | 986 | mdecorde | |
5 | 881 | mdecorde | class WriteIdAndNAttributes extends StaxIdentityParser { |
6 | 881 | mdecorde | |
7 | 881 | mdecorde | String textname
|
8 | 881 | mdecorde | |
9 | 881 | mdecorde | int nMileStone = 1, nPb = 1, nCb = 1, nLb = 1, nW = 1, nSeg = 1, nRejet = 1; |
10 | 881 | mdecorde | String previousMileStone, previousPb, previousCb, previousW;
|
11 | 881 | mdecorde | |
12 | 881 | mdecorde | |
13 | 881 | mdecorde | String PB = "pb", CB = "cb", LB = "lb", ID = "id", TYPE = "type", |
14 | 881 | mdecorde | N = "n", CORRESP = "corresp", FACS="facs", W="w", PC="pc", SEG="seg", |
15 | 881 | mdecorde | UNIT="unit", XML="xml", WP="wp", SURFACE="surface", POINT = ".", REJET ="rejet"; |
16 | 881 | mdecorde | |
17 | 881 | mdecorde | public WriteIdAndNAttributes(File xmlFile, String textname) { |
18 | 881 | mdecorde | super(xmlFile);
|
19 | 881 | mdecorde | |
20 | 881 | mdecorde | this.textname = textname
|
21 | 881 | mdecorde | } |
22 | 881 | mdecorde | |
23 | 881 | mdecorde | protected void writeAttributes() { |
24 | 881 | mdecorde | // do nothing
|
25 | 881 | mdecorde | } |
26 | 881 | mdecorde | |
27 | 881 | mdecorde | protected void processStartElement() { |
28 | 881 | mdecorde | String id = null |
29 | 881 | mdecorde | String n = null |
30 | 881 | mdecorde | String type = null |
31 | 881 | mdecorde | String corresp = null |
32 | 881 | mdecorde | String facs = null |
33 | 881 | mdecorde | |
34 | 881 | mdecorde | for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
35 | 881 | mdecorde | if (parser.getAttributeLocalName(i) == ID) {
|
36 | 881 | mdecorde | id = parser.getAttributeValue(i) |
37 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == N) { |
38 | 881 | mdecorde | n = parser.getAttributeValue(i) |
39 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == TYPE) { |
40 | 881 | mdecorde | type = parser.getAttributeValue(i) |
41 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == CORRESP) { |
42 | 881 | mdecorde | corresp = parser.getAttributeValue(i) |
43 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == FACS) { |
44 | 881 | mdecorde | facs = parser.getAttributeValue(i) |
45 | 881 | mdecorde | } |
46 | 881 | mdecorde | } |
47 | 881 | mdecorde | |
48 | 881 | mdecorde | super.processStartElement(); // attributes are not written because super.writeAttributes() is not called |
49 | 881 | mdecorde | |
50 | 881 | mdecorde | if (localname == "milestone" && parser.getAttributeValue(null, UNIT) == SURFACE) { |
51 | 881 | mdecorde | if (n == null) { |
52 | 881 | mdecorde | n = nMileStone |
53 | 881 | mdecorde | } else {
|
54 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
55 | 881 | mdecorde | if (nMileStone < tmp) {
|
56 | 881 | mdecorde | nMileStone = tmp |
57 | 881 | mdecorde | n = tmp |
58 | 881 | mdecorde | } else {
|
59 | 881 | mdecorde | n = nMileStone; |
60 | 881 | mdecorde | } |
61 | 881 | mdecorde | } |
62 | 881 | mdecorde | |
63 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
64 | 881 | mdecorde | if (facs == null) { |
65 | 881 | mdecorde | println "Error: found milestone@type=\"surface\" with no @facs at "+parser.getLocation()
|
66 | 881 | mdecorde | throw new Exception("no facs attribute") |
67 | 881 | mdecorde | } |
68 | 881 | mdecorde | if (facs.lastIndexOf(POINT) > 0) facs = facs.substring(0, facs.lastIndexOf(POINT)) |
69 | 881 | mdecorde | previousMileStone = "surf_$textname"+"_"+facs |
70 | 881 | mdecorde | id = previousMileStone |
71 | 881 | mdecorde | } else {
|
72 | 881 | mdecorde | previousMileStone = id |
73 | 881 | mdecorde | } |
74 | 881 | mdecorde | |
75 | 881 | mdecorde | nMileStone++ |
76 | 881 | mdecorde | } else if (localname == PB) { |
77 | 881 | mdecorde | if (n == null) { |
78 | 881 | mdecorde | n = nPb |
79 | 881 | mdecorde | } else {
|
80 | 881 | mdecorde | try {
|
81 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
82 | 881 | mdecorde | if (nPb < tmp) {
|
83 | 881 | mdecorde | nPb = tmp |
84 | 881 | mdecorde | } else {
|
85 | 881 | mdecorde | n = nPb; |
86 | 881 | mdecorde | } |
87 | 881 | mdecorde | } catch(Exception e) { } |
88 | 881 | mdecorde | } |
89 | 881 | mdecorde | |
90 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
91 | 881 | mdecorde | id = "page_${textname}_"+nPb
|
92 | 881 | mdecorde | previousPb = "${textname}_"+nPb
|
93 | 881 | mdecorde | } else {
|
94 | 881 | mdecorde | previousPb = id |
95 | 881 | mdecorde | } |
96 | 881 | mdecorde | |
97 | 881 | mdecorde | nRejet = nLb = nCb = 1
|
98 | 881 | mdecorde | nPb++ |
99 | 881 | mdecorde | } else if (localname == CB) { |
100 | 881 | mdecorde | if (n == null) { |
101 | 881 | mdecorde | n = nCb |
102 | 881 | mdecorde | } else {
|
103 | 881 | mdecorde | try {
|
104 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
105 | 881 | mdecorde | if (nCb < tmp) {
|
106 | 881 | mdecorde | nCb = tmp |
107 | 881 | mdecorde | } else {
|
108 | 881 | mdecorde | n = nCb; |
109 | 881 | mdecorde | } |
110 | 881 | mdecorde | } catch(Exception e) { } |
111 | 881 | mdecorde | } |
112 | 881 | mdecorde | |
113 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
114 | 881 | mdecorde | previousCb = "${previousPb}_"+nCb
|
115 | 881 | mdecorde | id = "col_${previousPb}_"+nCb
|
116 | 881 | mdecorde | } else {
|
117 | 881 | mdecorde | previousCb = id |
118 | 881 | mdecorde | } |
119 | 881 | mdecorde | |
120 | 881 | mdecorde | nLb = nRejet = 1
|
121 | 881 | mdecorde | nCb++ |
122 | 881 | mdecorde | } else if (localname == LB) { |
123 | 881 | mdecorde | if (n == null) { |
124 | 881 | mdecorde | if (REJET == type) {
|
125 | 881 | mdecorde | println "Warning: no 'n' attribute provided for 'rejet' line break at "+parser.getLocation()
|
126 | 881 | mdecorde | } |
127 | 881 | mdecorde | |
128 | 881 | mdecorde | n = nLb |
129 | 881 | mdecorde | } else {
|
130 | 881 | mdecorde | if (REJET != type) {
|
131 | 881 | mdecorde | try { // if n is numeric update nLgRelative |
132 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
133 | 881 | mdecorde | nLb = tmp; |
134 | 881 | mdecorde | } catch(Exception e) { |
135 | 881 | mdecorde | // n is not numeric use it
|
136 | 881 | mdecorde | } |
137 | 881 | mdecorde | } |
138 | 881 | mdecorde | } |
139 | 881 | mdecorde | |
140 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
141 | 881 | mdecorde | if (REJET == type) {
|
142 | 881 | mdecorde | id = "line_${previousCb}_"+n+"_r"+nRejet |
143 | 881 | mdecorde | } else {
|
144 | 881 | mdecorde | id = "line_${previousCb}_"+n
|
145 | 881 | mdecorde | } |
146 | 881 | mdecorde | } |
147 | 881 | mdecorde | |
148 | 881 | mdecorde | if (corresp == null && REJET == type) { |
149 | 881 | mdecorde | //println "Warning: no 'corresp' attribute provided for 'rejet' line break at "+parser.getLocation()
|
150 | 881 | mdecorde | corresp = "#line_${previousCb}_"+n; // next word id in the same column |
151 | 881 | mdecorde | } |
152 | 881 | mdecorde | |
153 | 881 | mdecorde | if (REJET != type) {
|
154 | 881 | mdecorde | nLb++; |
155 | 881 | mdecorde | } else {
|
156 | 881 | mdecorde | nRejet++; |
157 | 881 | mdecorde | } |
158 | 881 | mdecorde | |
159 | 881 | mdecorde | } else if (localname == W || localname == PC) { |
160 | 881 | mdecorde | if (n == null) { |
161 | 881 | mdecorde | n = nW |
162 | 881 | mdecorde | } else {
|
163 | 881 | mdecorde | try {
|
164 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
165 | 881 | mdecorde | if (nW < tmp) {
|
166 | 881 | mdecorde | nW = tmp |
167 | 881 | mdecorde | } else {
|
168 | 881 | mdecorde | n = nW; |
169 | 881 | mdecorde | } |
170 | 881 | mdecorde | } catch(Exception e) { } |
171 | 881 | mdecorde | } |
172 | 881 | mdecorde | |
173 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
174 | 881 | mdecorde | //previousW = "${previousPb}_"+nW
|
175 | 881 | mdecorde | id = localname+"_${textname}_"+nW
|
176 | 881 | mdecorde | } |
177 | 881 | mdecorde | previousW = id |
178 | 881 | mdecorde | nSeg = 1
|
179 | 881 | mdecorde | nW++ |
180 | 881 | mdecorde | } else if (localname == SEG && WP == type) { |
181 | 881 | mdecorde | if (n == null) { |
182 | 881 | mdecorde | n = nSeg |
183 | 881 | mdecorde | } else {
|
184 | 881 | mdecorde | try {
|
185 | 881 | mdecorde | int tmp = Integer.parseInt(n) |
186 | 881 | mdecorde | if (nSeg < tmp) {
|
187 | 881 | mdecorde | nSeg = tmp |
188 | 881 | mdecorde | } else {
|
189 | 881 | mdecorde | n = nSeg; |
190 | 881 | mdecorde | } |
191 | 881 | mdecorde | } catch(Exception e) { } |
192 | 881 | mdecorde | } |
193 | 881 | mdecorde | |
194 | 881 | mdecorde | if (id == null) { // don't rewrite "id" |
195 | 881 | mdecorde | id = "w_p_"+previousW+"_"+nSeg |
196 | 881 | mdecorde | } |
197 | 881 | mdecorde | nSeg++ |
198 | 881 | mdecorde | } |
199 | 881 | mdecorde | |
200 | 881 | mdecorde | // write attributes except ID, N and CORRESP
|
201 | 881 | mdecorde | for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
202 | 881 | mdecorde | if (parser.getAttributeLocalName(i) == ID) {
|
203 | 881 | mdecorde | |
204 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == N) { |
205 | 881 | mdecorde | |
206 | 881 | mdecorde | } else if (parser.getAttributeLocalName(i) == CORRESP) { |
207 | 881 | mdecorde | |
208 | 881 | mdecorde | } else {
|
209 | 881 | mdecorde | writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)) |
210 | 881 | mdecorde | } |
211 | 881 | mdecorde | } |
212 | 881 | mdecorde | //if (localname == "lb") println "write id: "+["xml", "id", id]
|
213 | 881 | mdecorde | if (id != null) |
214 | 881 | mdecorde | writeAttribute(XML, ID, id) |
215 | 881 | mdecorde | if (n != null) |
216 | 881 | mdecorde | writeAttribute(null, N, n)
|
217 | 881 | mdecorde | if (corresp != null) |
218 | 881 | mdecorde | writeAttribute(null, CORRESP, corresp)
|
219 | 881 | mdecorde | } |
220 | 881 | mdecorde | |
221 | 881 | mdecorde | public static void main(String[] args) { |
222 | 881 | mdecorde | File xmlFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28.xml") |
223 | 881 | mdecorde | File outFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o.xml") |
224 | 881 | mdecorde | |
225 | 881 | mdecorde | WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlFile, "qgraal_cm") |
226 | 881 | mdecorde | println wiana.process(outFile) |
227 | 881 | mdecorde | |
228 | 881 | mdecorde | String text = xmlFile.getText().replaceAll(">", ">\n"); |
229 | 881 | mdecorde | new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-p.xml").write(text); |
230 | 881 | mdecorde | text = outFile.getText().replaceAll(">", ">\n"); |
231 | 881 | mdecorde | new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o-p.xml").write(text); |
232 | 881 | mdecorde | } |
233 | 881 | mdecorde | } |