68 |
68 |
@Field @Option(name="textColumnList", usage="text columns list separated by comma", widget="String", required=false, def="textColumnList1,textColumnList2")
|
69 |
69 |
def textColumnList
|
70 |
70 |
|
|
71 |
@Field @Option(name="EmbedInTEI", usage="text columns list separated by comma", widget="Boolean", required=false, def="false")
|
|
72 |
def EmbedInTEI
|
|
73 |
|
71 |
74 |
if (!ParametersDialog.open(this)) return
|
72 |
75 |
|
73 |
76 |
if (!inputFile.exists()) {
|
... | ... | |
158 |
161 |
|
159 |
162 |
writer.writeStartDocument("UTF-8","1.0")
|
160 |
163 |
writer.writeCharacters("\n") // simple XML formating
|
|
164 |
|
|
165 |
if (EmbedInTEI) {
|
|
166 |
writer.writeStartElement("TEI")
|
|
167 |
writer.writeStartElement("teiHeader")
|
|
168 |
writer.writeEndElement() // teiHeader
|
|
169 |
writer.writeStartElement("text")
|
|
170 |
writer.writeCharacters("\n")
|
|
171 |
}
|
|
172 |
|
161 |
173 |
writer.writeStartElement(rootTag)
|
162 |
174 |
writer.writeCharacters("\n")
|
163 |
175 |
|
... | ... | |
173 |
185 |
writer.writeCharacters(" ")
|
174 |
186 |
writer.writeStartElement(textTag)
|
175 |
187 |
metadataColumnIndex.each { colIndex -> // build an attribute for each metadata
|
176 |
|
String s = getCellValueAsString(ws.getRow(rowIndex).getCell(colIndex));
|
|
188 |
def row = ws.getRow(rowIndex)
|
|
189 |
if (row != null) {
|
|
190 |
String s = getCellValueAsString(row.getCell(colIndex));
|
177 |
191 |
if (s == null) s ="";
|
178 |
192 |
value = s.replaceAll("\n", ";").trim()
|
179 |
193 |
writer.writeAttribute(normalizedHeaders[colIndex], value)
|
... | ... | |
184 |
198 |
writer.writeAttribute(normalizedHeaders[colIndex]+"mois", matches[0][2])
|
185 |
199 |
writer.writeAttribute(normalizedHeaders[colIndex]+"annee", matches[0][3])
|
186 |
200 |
}
|
|
201 |
}
|
187 |
202 |
}
|
188 |
203 |
writer.writeCharacters("\n")
|
189 |
204 |
|
... | ... | |
195 |
210 |
|
196 |
211 |
metadataColumnIndex.each { colIndex ->
|
197 |
212 |
writer.writeStartElement("item")
|
198 |
|
writer.writeCharacters(headers[colIndex]+" : "+getCellValueAsString(ws.getRow(rowIndex).getCell(colIndex)).replaceAll("\n", ";"))
|
199 |
|
writer.writeEndElement() // item
|
200 |
|
writer.writeCharacters("\n")
|
|
213 |
def row = ws.getRow(rowIndex)
|
|
214 |
if (row != null) {
|
|
215 |
writer.writeCharacters(headers[colIndex]+" : "+getCellValueAsString(row.getCell(colIndex)).replaceAll("\n", ";"))
|
|
216 |
writer.writeEndElement() // item
|
|
217 |
writer.writeCharacters("\n")
|
|
218 |
}
|
201 |
219 |
}
|
202 |
220 |
writer.writeCharacters(" ")
|
203 |
221 |
writer.writeEndElement() // list
|
... | ... | |
214 |
232 |
writer.writeCharacters(headers[colIndex]+" : ")
|
215 |
233 |
writer.writeEndElement() // hi
|
216 |
234 |
writer.writeEndElement() // head
|
217 |
|
value = getCellValueAsString(ws.getRow(rowIndex).getCell(colIndex))
|
|
235 |
def row = ws.getRow(rowIndex)
|
|
236 |
if (row != null) {
|
|
237 |
value = getCellValueAsString(row.getCell(colIndex))
|
218 |
238 |
|
219 |
|
if (value ==~ /(?s)^[A-Z]{3}: [^;\n]+? +[;\n].*/) {
|
220 |
|
value.findAll( /(?s)[A-Z]{3}: ([^;\n]+?) +[;\n]/ ).each { desc ->
|
221 |
|
writer.writeStartElement("descripteur")
|
222 |
|
matches = (desc =~ /(?s)([A-Z]{3}): ([^;\n]+?) +[;\n]/)
|
223 |
|
writer.writeAttribute("type", matches[0][1])
|
224 |
|
writer.writeCharacters(matches[0][2])
|
225 |
|
writer.writeEndElement() // descripteur
|
|
239 |
if (value ==~ /(?s)^[A-Z]{3}: [^;\n]+? +[;\n].*/) {
|
|
240 |
value.findAll( /(?s)[A-Z]{3}: ([^;\n]+?) +[;\n]/ ).each { desc ->
|
|
241 |
writer.writeStartElement("descripteur")
|
|
242 |
matches = (desc =~ /(?s)([A-Z]{3}): ([^;\n]+?) +[;\n]/)
|
|
243 |
writer.writeAttribute("type", matches[0][1])
|
|
244 |
writer.writeCharacters(matches[0][2])
|
|
245 |
writer.writeEndElement() // descripteur
|
|
246 |
}
|
|
247 |
} else {
|
|
248 |
writer.writeCharacters(value)
|
226 |
249 |
}
|
227 |
|
} else {
|
228 |
|
writer.writeCharacters(value)
|
|
250 |
writer.writeEndElement() // p
|
|
251 |
writer.writeEndElement() // textColumn
|
|
252 |
writer.writeCharacters("\n")
|
229 |
253 |
}
|
230 |
|
writer.writeEndElement() // p
|
231 |
|
writer.writeEndElement() // textColumn
|
232 |
|
writer.writeCharacters("\n")
|
233 |
254 |
}
|
234 |
255 |
|
235 |
256 |
writer.writeCharacters(" ")
|
... | ... | |
239 |
260 |
|
240 |
261 |
writer.writeEndElement() // rootTag
|
241 |
262 |
writer.writeCharacters("\n")
|
|
263 |
if (EmbedInTEI) {
|
|
264 |
writer.writeEndElement() // text
|
|
265 |
writer.writeCharacters("\n")
|
|
266 |
writer.writeEndElement() // TEI
|
|
267 |
writer.writeCharacters("\n")
|
|
268 |
}
|
242 |
269 |
writer.close()
|
243 |
270 |
output.close()
|
244 |
271 |
println "Result file: $outputFile"
|