root / tmp / org.txm.core / src / java / org / txm / scripts / importer / WriteIdAndNAttributes.groovy @ 2473
History | View | Annotate | Download (5.7 kB)
1 |
package org.txm.scripts.importer
|
---|---|
2 |
|
3 |
import org.txm.importer.StaxIdentityParser |
4 |
|
5 |
class WriteIdAndNAttributes extends StaxIdentityParser { |
6 |
|
7 |
String textname
|
8 |
|
9 |
int nMileStone = 1, nPb = 1, nCb = 1, nLb = 1, nW = 1, nSeg = 1, nRejet = 1; |
10 |
String previousMileStone, previousPb, previousCb, previousW;
|
11 |
|
12 |
|
13 |
String PB = "pb", CB = "cb", LB = "lb", ID = "id", TYPE = "type", |
14 |
N = "n", CORRESP = "corresp", FACS="facs", W="w", PC="pc", SEG="seg", |
15 |
UNIT="unit", XML="xml", WP="wp", SURFACE="surface", POINT = ".", REJET ="rejet"; |
16 |
|
17 |
public WriteIdAndNAttributes(File xmlFile, String textname) { |
18 |
super(xmlFile);
|
19 |
|
20 |
this.textname = textname
|
21 |
} |
22 |
|
23 |
protected void writeAttributes() { |
24 |
// do nothing
|
25 |
} |
26 |
|
27 |
protected void processStartElement() { |
28 |
String id = null |
29 |
String n = null |
30 |
String type = null |
31 |
String corresp = null |
32 |
String facs = null |
33 |
|
34 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
35 |
if (parser.getAttributeLocalName(i) == ID) {
|
36 |
id = parser.getAttributeValue(i) |
37 |
} else if (parser.getAttributeLocalName(i) == N) { |
38 |
n = parser.getAttributeValue(i) |
39 |
} else if (parser.getAttributeLocalName(i) == TYPE) { |
40 |
type = parser.getAttributeValue(i) |
41 |
} else if (parser.getAttributeLocalName(i) == CORRESP) { |
42 |
corresp = parser.getAttributeValue(i) |
43 |
} else if (parser.getAttributeLocalName(i) == FACS) { |
44 |
facs = parser.getAttributeValue(i) |
45 |
} |
46 |
} |
47 |
|
48 |
super.processStartElement(); // attributes are not written because super.writeAttributes() is not called |
49 |
|
50 |
if (localname == "milestone" && parser.getAttributeValue(null, UNIT) == SURFACE) { |
51 |
if (n == null) { |
52 |
n = nMileStone |
53 |
} else {
|
54 |
int tmp = Integer.parseInt(n) |
55 |
if (nMileStone < tmp) {
|
56 |
nMileStone = tmp |
57 |
n = tmp |
58 |
} else {
|
59 |
n = nMileStone; |
60 |
} |
61 |
} |
62 |
|
63 |
if (id == null) { // don't rewrite "id" |
64 |
if (facs == null) { |
65 |
println "Error: found milestone@type=\"surface\" with no @facs at "+parser.getLocation()
|
66 |
throw new Exception("no facs attribute") |
67 |
} |
68 |
if (facs.lastIndexOf(POINT) > 0) facs = facs.substring(0, facs.lastIndexOf(POINT)) |
69 |
previousMileStone = "surf_$textname"+"_"+facs |
70 |
id = previousMileStone |
71 |
} else {
|
72 |
previousMileStone = id |
73 |
} |
74 |
|
75 |
nMileStone++ |
76 |
} else if (localname == PB) { |
77 |
if (n == null) { |
78 |
n = nPb |
79 |
} else {
|
80 |
try {
|
81 |
int tmp = Integer.parseInt(n) |
82 |
if (nPb < tmp) {
|
83 |
nPb = tmp |
84 |
} else {
|
85 |
n = nPb; |
86 |
} |
87 |
} catch(Exception e) { } |
88 |
} |
89 |
|
90 |
if (id == null) { // don't rewrite "id" |
91 |
id = "page_${textname}_"+nPb
|
92 |
previousPb = "${textname}_"+nPb
|
93 |
} else {
|
94 |
previousPb = id |
95 |
} |
96 |
|
97 |
nRejet = nLb = nCb = 1
|
98 |
nPb++ |
99 |
} else if (localname == CB) { |
100 |
if (n == null) { |
101 |
n = nCb |
102 |
} else {
|
103 |
try {
|
104 |
int tmp = Integer.parseInt(n) |
105 |
if (nCb < tmp) {
|
106 |
nCb = tmp |
107 |
} else {
|
108 |
n = nCb; |
109 |
} |
110 |
} catch(Exception e) { } |
111 |
} |
112 |
|
113 |
if (id == null) { // don't rewrite "id" |
114 |
previousCb = "${previousPb}_"+nCb
|
115 |
id = "col_${previousPb}_"+nCb
|
116 |
} else {
|
117 |
previousCb = id |
118 |
} |
119 |
|
120 |
nLb = nRejet = 1
|
121 |
nCb++ |
122 |
} else if (localname == LB) { |
123 |
if (n == null) { |
124 |
if (REJET == type) {
|
125 |
println "Warning: no 'n' attribute provided for 'rejet' line break at "+parser.getLocation()
|
126 |
} |
127 |
|
128 |
n = nLb |
129 |
} else {
|
130 |
if (REJET != type) {
|
131 |
try { // if n is numeric update nLgRelative |
132 |
int tmp = Integer.parseInt(n) |
133 |
nLb = tmp; |
134 |
} catch(Exception e) { |
135 |
// n is not numeric use it
|
136 |
} |
137 |
} |
138 |
} |
139 |
|
140 |
if (id == null) { // don't rewrite "id" |
141 |
if (REJET == type) {
|
142 |
id = "line_${previousCb}_"+n+"_r"+nRejet |
143 |
} else {
|
144 |
id = "line_${previousCb}_"+n
|
145 |
} |
146 |
} |
147 |
|
148 |
if (corresp == null && REJET == type) { |
149 |
//println "Warning: no 'corresp' attribute provided for 'rejet' line break at "+parser.getLocation()
|
150 |
corresp = "#line_${previousCb}_"+n; // next word id in the same column |
151 |
} |
152 |
|
153 |
if (REJET != type) {
|
154 |
nLb++; |
155 |
} else {
|
156 |
nRejet++; |
157 |
} |
158 |
|
159 |
} else if (localname == W || localname == PC) { |
160 |
if (n == null) { |
161 |
n = nW |
162 |
} else {
|
163 |
try {
|
164 |
int tmp = Integer.parseInt(n) |
165 |
if (nW < tmp) {
|
166 |
nW = tmp |
167 |
} else {
|
168 |
n = nW; |
169 |
} |
170 |
} catch(Exception e) { } |
171 |
} |
172 |
|
173 |
if (id == null) { // don't rewrite "id" |
174 |
//previousW = "${previousPb}_"+nW
|
175 |
id = localname+"_${textname}_"+nW
|
176 |
} |
177 |
previousW = id |
178 |
nSeg = 1
|
179 |
nW++ |
180 |
} else if (localname == SEG && WP == type) { |
181 |
if (n == null) { |
182 |
n = nSeg |
183 |
} else {
|
184 |
try {
|
185 |
int tmp = Integer.parseInt(n) |
186 |
if (nSeg < tmp) {
|
187 |
nSeg = tmp |
188 |
} else {
|
189 |
n = nSeg; |
190 |
} |
191 |
} catch(Exception e) { } |
192 |
} |
193 |
|
194 |
if (id == null) { // don't rewrite "id" |
195 |
id = "w_p_"+previousW+"_"+nSeg |
196 |
} |
197 |
nSeg++ |
198 |
} |
199 |
|
200 |
// write attributes except ID, N and CORRESP
|
201 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
202 |
if (parser.getAttributeLocalName(i) == ID) {
|
203 |
|
204 |
} else if (parser.getAttributeLocalName(i) == N) { |
205 |
|
206 |
} else if (parser.getAttributeLocalName(i) == CORRESP) { |
207 |
|
208 |
} else {
|
209 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)) |
210 |
} |
211 |
} |
212 |
//if (localname == "lb") println "write id: "+["xml", "id", id]
|
213 |
if (id != null) |
214 |
writeAttribute(XML, ID, id) |
215 |
if (n != null) |
216 |
writeAttribute(null, N, n)
|
217 |
if (corresp != null) |
218 |
writeAttribute(null, CORRESP, corresp)
|
219 |
} |
220 |
|
221 |
public static void main(String[] args) { |
222 |
File xmlFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28.xml") |
223 |
File outFile = new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o.xml") |
224 |
|
225 |
WriteIdAndNAttributes wiana = new WriteIdAndNAttributes(xmlFile, "qgraal_cm") |
226 |
println wiana.process(outFile) |
227 |
|
228 |
String text = xmlFile.getText().replaceAll(">", ">\n"); |
229 |
new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-p.xml").write(text); |
230 |
text = outFile.getText().replaceAll(">", ">\n"); |
231 |
new File("/home/mdecorde/xml/bugrejet/Psautier5-or28-o-p.xml").write(text); |
232 |
} |
233 |
} |