Révision 1543
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/conversion/EuroPressToXML2018Macro.groovy (revision 1543) | ||
---|---|---|
1 |
package org.txm.macro.conversion |
|
2 |
// STANDARD DECLARATIONS |
|
3 |
|
|
4 |
import groovy.xml.QName |
|
5 |
import java.text.DecimalFormat |
|
6 |
import org.txm.importer.DomUtils |
|
7 |
import org.txm.importer.ValidateXml |
|
8 |
import org.w3c.tidy.Tidy |
|
9 |
import groovy.util.XmlParser |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
import groovy.transform.Field |
|
12 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
13 |
import org.jsoup.Jsoup |
|
14 |
import org.jsoup.nodes.Document.OutputSettings.Syntax |
|
15 |
|
|
16 |
// BEGINNING OF PARAMETERS |
|
17 |
|
|
18 |
@Field @Option(name="inputDir", usage="The directory containing the html files, to export from the Europress portal", widget="Folder", required=true, def="") |
|
19 |
def inputDir |
|
20 |
|
|
21 |
@Field @Option(name="inputEncoding", usage="character encoding used in the HTML exported files", widget="String", required=false, def="iso-8859-1") |
|
22 |
String inputEncoding |
|
23 |
|
|
24 |
@Field @Option(name="outputDir", usage="The directory containing the result files, to import with the XTZ+CSV import module into TXM", widget="Folder", required=true, def="") |
|
25 |
def outputDir |
|
26 |
|
|
27 |
@Field @Option(name="corpusName", usage="corpus name", widget="String", required=true, def="") |
|
28 |
String corpusName |
|
29 |
|
|
30 |
@Field @Option(name="columnSeparator",usage="", widget="String", required=false, def=",") |
|
31 |
def columnSeparator |
|
32 |
|
|
33 |
@Field @Option(name="txtSeparator",usage="", widget="String", required=false, def="\"") |
|
34 |
def txtSeparator |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="show debug messages and keep temporary results", widget="Boolean", required=false, def="false") |
|
37 |
def debug |
|
38 |
|
|
39 |
// Open the parameters input dialog box |
|
40 |
if (!ParametersDialog.open(this)) return |
|
41 |
|
|
42 |
// END OF PARAMETERS |
|
43 |
|
|
44 |
if (!inputDir.exists()) { |
|
45 |
println "** inputDir does not exist: $inputDir, aborting." |
|
46 |
return false |
|
47 |
} |
|
48 |
|
|
49 |
xslposttokContent = """<?xml version="1.0"?> |
|
50 |
<xsl:stylesheet xmlns:edate="http://exslt.org/dates-and-times" |
|
51 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" |
|
52 |
xmlns:txm="http://textometrie.org/ns/1.0" |
|
53 |
exclude-result-prefixes="tei edate" xpath-default-namespace="http://www.tei-c.org/ns/1.0" version="2.0"> |
|
54 |
|
|
55 |
<!-- |
|
56 |
This software is dual-licensed: |
|
57 |
|
|
58 |
1. Distributed under a Creative Commons Attribution-ShareAlike 3.0 |
|
59 |
Unported License http://creativecommons.org/licenses/by-sa/3.0/ |
|
60 |
|
|
61 |
2. http://www.opensource.org/licenses/BSD-2-Clause |
|
62 |
|
|
63 |
All rights reserved. |
|
64 |
|
|
65 |
Redistribution and use in source and binary forms, with or without |
|
66 |
modification, are permitted provided that the following conditions are |
|
67 |
met: |
|
68 |
|
|
69 |
* Redistributions of source code must retain the above copyright |
|
70 |
notice, this list of conditions and the following disclaimer. |
|
71 |
|
|
72 |
* Redistributions in binary form must reproduce the above copyright |
|
73 |
notice, this list of conditions and the following disclaimer in the |
|
74 |
documentation and/or other materials provided with the distribution. |
|
75 |
|
|
76 |
This software is provided by the copyright holders and contributors |
|
77 |
"as is" and any express or implied warranties, including, but not |
|
78 |
limited to, the implied warranties of merchantability and fitness for |
|
79 |
a particular purpose are disclaimed. In no event shall the copyright |
|
80 |
holder or contributors be liable for any direct, indirect, incidental, |
|
81 |
special, exemplary, or consequential damages (including, but not |
|
82 |
limited to, procurement of substitute goods or services; loss of use, |
|
83 |
data, or profits; or business interruption) however caused and on any |
|
84 |
theory of liability, whether in contract, strict liability, or tort |
|
85 |
(including negligence or otherwise) arising in any way out of the use |
|
86 |
of this software, even if advised of the possibility of such damage. |
|
87 |
|
|
88 |
|
|
89 |
This stylesheet adds a ref attribute to w elements that will be used for |
|
90 |
references in TXM concordances. Can be used with TXM XTZ import module. |
|
91 |
|
|
92 |
w ref is composed of : |
|
93 |
- docpublicationname |
|
94 |
- date |
|
95 |
|
|
96 |
Written by Alexei Lavrentiev, UMR 5317 IHRIM, 2017 |
|
97 |
Serge Heiden, UMR 5317 IHRIM, 2018 |
|
98 |
--> |
|
99 |
|
|
100 |
|
|
101 |
<xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/> |
|
102 |
|
|
103 |
|
|
104 |
<!-- General patterns: all elements, attributes, comments and processing instructions are copied --> |
|
105 |
|
|
106 |
<xsl:template match="*"> |
|
107 |
<xsl:copy> |
|
108 |
<xsl:apply-templates select="*|@*|processing-instruction()|comment()|text()"/> |
|
109 |
</xsl:copy> |
|
110 |
</xsl:template> |
|
111 |
|
|
112 |
<xsl:template match="*" mode="position"><xsl:value-of select="count(preceding-sibling::*)"/></xsl:template> |
|
113 |
|
|
114 |
<xsl:template match="@*|comment()|processing-instruction()"> |
|
115 |
<xsl:copy/> |
|
116 |
</xsl:template> |
|
117 |
|
|
118 |
<xsl:template match="*:w"> |
|
119 |
<xsl:variable name="ref"> |
|
120 |
|
|
121 |
<xsl:choose> |
|
122 |
<xsl:when test="ancestor::*:text[1]/@ref"> |
|
123 |
<!-- <xsl:text>ref: </xsl:text> --> |
|
124 |
<xsl:value-of select="ancestor::*:text[1]/@ref"/> |
|
125 |
</xsl:when> |
|
126 |
<xsl:otherwise> |
|
127 |
<!-- <xsl:text>[NO docpublicationname]</xsl:text> --> |
|
128 |
</xsl:otherwise> |
|
129 |
</xsl:choose> |
|
130 |
<xsl:choose> |
|
131 |
<xsl:when test="ancestor::*:text[1]/@docpublicationname"> |
|
132 |
<!-- <xsl:text>docpublicationname: </xsl:text> --> |
|
133 |
<xsl:value-of select="ancestor::*:text[1]/@docpublicationname"/> |
|
134 |
</xsl:when> |
|
135 |
<xsl:otherwise> |
|
136 |
<!-- <xsl:text>[NO docpublicationname]</xsl:text> --> |
|
137 |
</xsl:otherwise> |
|
138 |
</xsl:choose> |
|
139 |
<xsl:choose> |
|
140 |
<xsl:when test="ancestor::*:text[1]/@date"> |
|
141 |
<!-- <xsl:text>date: </xsl:text> --> |
|
142 |
<xsl:value-of select="ancestor::*:text[1]/@date"/> |
|
143 |
</xsl:when> |
|
144 |
<xsl:otherwise> |
|
145 |
<!-- <xsl:text>[NO date]</xsl:text> --> |
|
146 |
</xsl:otherwise> |
|
147 |
</xsl:choose> |
|
148 |
|
|
149 |
<!-- |
|
150 |
<xsl:if test="ancestor::*:text/@*:id and preceding::*:pb[1]/@n"> |
|
151 |
<xsl:text>, </xsl:text> |
|
152 |
</xsl:if> |
|
153 |
--> |
|
154 |
<xsl:if test="ancestor::*:p[1]/@n"> |
|
155 |
<xsl:text>§ </xsl:text> |
|
156 |
<xsl:value-of select="ancestor::*:p[1]/@n"/> |
|
157 |
</xsl:if> |
|
158 |
<!-- |
|
159 |
<xsl:if test="preceding::*:pb[1]/@n"> |
|
160 |
<xsl:text>p. </xsl:text> |
|
161 |
<xsl:value-of select="preceding::*:pb[1]/@n"/> |
|
162 |
</xsl:if> <xsl:if test="(ancestor::*:text/@*:id or preceding::*:pb[1]/@n) and preceding::*:lb[1]/@n"> |
|
163 |
<xsl:text>, </xsl:text> |
|
164 |
</xsl:if> |
|
165 |
<xsl:if test="preceding::*:lb[1]/@n"> |
|
166 |
<xsl:text>l. </xsl:text> |
|
167 |
<xsl:value-of select="preceding::*:lb[1]/@n"/> |
|
168 |
</xsl:if> |
|
169 |
--> |
|
170 |
</xsl:variable> |
|
171 |
<xsl:copy> |
|
172 |
<xsl:apply-templates select="@*"/> |
|
173 |
<xsl:attribute name="ref"><xsl:value-of select="\$ref"/></xsl:attribute> |
|
174 |
<xsl:apply-templates select="*|processing-instruction()|comment()|text()"/> |
|
175 |
</xsl:copy> |
|
176 |
</xsl:template> |
|
177 |
|
|
178 |
</xsl:stylesheet> |
|
179 |
""" |
|
180 |
|
|
181 |
cssContent = """/* |
|
182 |
Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté |
|
183 |
Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
184 |
@author cbourdot |
|
185 |
@author sheiden |
|
186 |
|
|
187 |
TXM default CSS 06-2017 |
|
188 |
|
|
189 |
*/ |
|
190 |
|
|
191 |
.txmeditionpage { |
|
192 |
font-size: 14px; |
|
193 |
text-indent: none; |
|
194 |
text-align: justify; |
|
195 |
box-shadow: .3125em .3125em .625em 0 #888; |
|
196 |
margin: 1.25em auto; |
|
197 |
padding: 1.25em; |
|
198 |
width: 400px; |
|
199 |
min-height: 90%; |
|
200 |
} |
|
201 |
|
|
202 |
.txmeditionpb { |
|
203 |
text-align: center; |
|
204 |
} |
|
205 |
|
|
206 |
.txmeditionpb::before { |
|
207 |
content: "- "; |
|
208 |
} |
|
209 |
|
|
210 |
.txmeditionpb::after { |
|
211 |
content: " -"; |
|
212 |
} |
|
213 |
|
|
214 |
.txmlettrinep:first-letter { |
|
215 |
float: left; |
|
216 |
font-size: 6em; |
|
217 |
line-height: 1; |
|
218 |
margin-right: 0.2em; |
|
219 |
} |
|
220 |
|
|
221 |
a { |
|
222 |
color:#802520; |
|
223 |
} |
|
224 |
|
|
225 |
h1 { |
|
226 |
font-size: 20px; |
|
227 |
font-variant: small-caps; |
|
228 |
text-align: center; |
|
229 |
color:#802520; |
|
230 |
} |
|
231 |
|
|
232 |
h2 { |
|
233 |
font-size: 18px; |
|
234 |
font-variant: small-caps; |
|
235 |
text-align: center; |
|
236 |
color:#802520; |
|
237 |
} |
|
238 |
|
|
239 |
h3 { |
|
240 |
font-size: 16px; |
|
241 |
font-variant: small-caps; |
|
242 |
text-align: center; |
|
243 |
color:#802520; |
|
244 |
} |
|
245 |
|
|
246 |
p { |
|
247 |
text-indent: 0.2cm; |
|
248 |
text-align: justify; |
|
249 |
text-justify: inter-word; |
|
250 |
} |
|
251 |
|
|
252 |
img { |
|
253 |
margin: 10px 10px 10px 10px; |
|
254 |
} |
|
255 |
|
|
256 |
td[rend="table-cell-align-right"] { |
|
257 |
text-align: right; |
|
258 |
} |
|
259 |
|
|
260 |
td[rend="table-cell-align-left"] { |
|
261 |
text-align: left; |
|
262 |
} |
|
263 |
|
|
264 |
td[rend="table-cell-align-center"] { |
|
265 |
text-align: center; |
|
266 |
} |
|
267 |
""" |
|
268 |
|
|
269 |
outputDir.deleteDir() |
|
270 |
outputDir.mkdir() |
|
271 |
outputDir = new File(outputDir, corpusName) |
|
272 |
outputDir.deleteDir() |
|
273 |
outputDir.mkdir() |
|
274 |
|
|
275 |
tmpDir = new File(outputDir, "tmp") |
|
276 |
tmpDir.deleteDir() |
|
277 |
tmpDir.mkdir() |
|
278 |
|
|
279 |
tmpXhtmlOutput = new File(outputDir, "xhtml") |
|
280 |
tmpXhtmlOutput.deleteDir() |
|
281 |
tmpXhtmlOutput.mkdir() |
|
282 |
|
|
283 |
duplicates = new File(outputDir, "duplicates") |
|
284 |
duplicates.deleteDir() |
|
285 |
duplicates.mkdir() |
|
286 |
|
|
287 |
xslDir = new File(outputDir, "xsl") |
|
288 |
xslDir.mkdir() |
|
289 |
xslposttokDir = new File(xslDir, "3-posttok") |
|
290 |
xslposttokDir.mkdir() |
|
291 |
xslposttokFile = new File(xslposttokDir, "txm-posttok-addRef-ref.xsl") |
|
292 |
cssDir = new File(outputDir, "css") |
|
293 |
cssDir.mkdir() |
|
294 |
cssFile = new File(cssDir, corpusName+".css") |
|
295 |
|
|
296 |
xslposttokFile << xslposttokContent |
|
297 |
cssFile << cssContent |
|
298 |
|
|
299 |
metadataFile = new File(outputDir, "metadata.csv") |
|
300 |
metadataWriter = metadataFile.newWriter("UTF-8") |
|
301 |
|
|
302 |
int itext = 0 |
|
303 |
def formater = new DecimalFormat("0000") |
|
304 |
|
|
305 |
// HTML elements containing metadata content, with @class=metadataKeys |
|
306 |
def metadataKeys = ["DocPublicationName", "DocHeader", "titreArticle"] |
|
307 |
|
|
308 |
// HTML elements containing text content, with @class=textClass |
|
309 |
def textClass = "docOcurrContainer" |
|
310 |
|
|
311 |
// write metadata header |
|
312 |
metadataWriter.print "id" |
|
313 |
metadataKeys.each { metadataWriter.print columnSeparator+it.toLowerCase() } |
|
314 |
// DocHeader substrings |
|
315 |
metadataWriter.print columnSeparator+"rubrique" |
|
316 |
metadataWriter.print columnSeparator+"date" |
|
317 |
metadataWriter.print columnSeparator+"words" |
|
318 |
metadataWriter.print columnSeparator+"pages" |
|
319 |
metadataWriter.print columnSeparator+"textorder" // date |
|
320 |
metadataWriter.println "" |
|
321 |
|
|
322 |
def files = [] |
|
323 |
inputDir.eachFileMatch(~/.*\.(html|HTML)/){ htmlFile -> files << htmlFile} |
|
324 |
files = files.sort() |
|
325 |
|
|
326 |
def done = new HashSet<String>() |
|
327 |
def ignored = [] |
|
328 |
def allTitles = new HashSet() |
|
329 |
def dones = [:] |
|
330 |
def ignoreds = [] |
|
331 |
|
|
332 |
// scan node for text content |
|
333 |
def getText(def node) { |
|
334 |
//if (debug) println "node: "+node |
|
335 |
String s = " " |
|
336 |
if (node instanceof String) { |
|
337 |
s += " "+node |
|
338 |
} else { |
|
339 |
for(def c : node.children()) |
|
340 |
s += " "+getText(c) |
|
341 |
} |
|
342 |
//println " "+s.replace("\n", " ").trim() |
|
343 |
return " "+s.replace("\n", " ").trim() |
|
344 |
} |
|
345 |
|
|
346 |
println files.size()+" files to process." |
|
347 |
println "Creating $metadataFile" |
|
348 |
|
|
349 |
for (File htmlFile : files) { |
|
350 |
println "Processing $htmlFile" |
|
351 |
|
|
352 |
String name = htmlFile.getName() |
|
353 |
name = name.substring(0, name.lastIndexOf(".")) |
|
354 |
|
|
355 |
File xhtmlFile = new File(tmpXhtmlOutput, name+".xhtml") |
|
356 |
|
|
357 |
|
|
358 |
if (inputEncoding.size() > 0) { |
|
359 |
doc = Jsoup.parse(htmlFile, inputEncoding, "") |
|
360 |
} else{ |
|
361 |
doc = Jsoup.parse(htmlFile, "UTF8") |
|
362 |
} |
|
363 |
doc.outputSettings().escapeMode(org.jsoup.nodes.Entities.EscapeMode.xhtml) |
|
364 |
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) |
|
365 |
|
|
366 |
xhtmlFile.withWriter("UTF-8") { out -> |
|
367 |
out.print doc.html().replace("\"=\"\"", "") |
|
368 |
} |
|
369 |
|
|
370 |
if (!ValidateXml.test(xhtmlFile)) { |
|
371 |
println "Error: $xhtmlFile is malformed." |
|
372 |
continue |
|
373 |
} |
|
374 |
|
|
375 |
def root = new XmlParser(false, true, true).parse(xhtmlFile) |
|
376 |
|
|
377 |
// one <article> per text |
|
378 |
for (def article : root.body.article) { |
|
379 |
def textMetadata = [:] |
|
380 |
|
|
381 |
for (def key : metadataKeys) { |
|
382 |
def values = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == key } |
|
383 |
textMetadata[key] = values |
|
384 |
} |
|
385 |
|
|
386 |
def textContent = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == textClass } |
|
387 |
|
|
388 |
String sign = getText(textMetadata["titreArticle"]) // identify a text |
|
389 |
|
|
390 |
// build text id |
|
391 |
itext++ |
|
392 |
File xmlFile = xmlFile = new File(outputDir, name+"_"+formater.format((itext))+".xml") |
|
393 |
if (allTitles.contains(sign)) { |
|
394 |
ignored << sign |
|
395 |
xmlFile = new File(duplicates, name+"_"+formater.format(itext)+".xml") |
|
396 |
ignoreds << xmlFile.getName() |
|
397 |
} |
|
398 |
allTitles.add(sign) |
|
399 |
|
|
400 |
textId = name+"_"+formater.format(itext) |
|
401 |
|
|
402 |
def rubrique |
|
403 |
def date |
|
404 |
def words |
|
405 |
def pages |
|
406 |
def textorder |
|
407 |
def ref |
|
408 |
|
|
409 |
// write metadata |
|
410 |
metadataWriter.print "$textId" |
|
411 |
for (def k : textMetadata.keySet()) { |
|
412 |
value = getText(textMetadata[k]) |
|
413 |
if (value == null) value = "N/A" |
|
414 |
def potentialDate |
|
415 |
if (k == "DocHeader") { // date, words, pages |
|
416 |
docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) [0-9][0-9]? (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) [0-9]{4}) - ([0-9]+) mots, p\. (.*)$/) |
|
417 |
if (docHeaderParse.size() == 1) { |
|
418 |
|
|
419 |
date = docHeaderParse[0][2] |
|
420 |
style = java.text.DateFormat.FULL |
|
421 |
df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE) |
|
422 |
potentialDate = df.parse(date) |
|
423 |
formatter = new java.text.SimpleDateFormat("yyyy-MM-dd") |
|
424 |
rubrique = docHeaderParse[0][1].trim() |
|
425 |
date = formatter.format(potentialDate) |
|
426 |
textorder = date |
|
427 |
words = docHeaderParse[0][5] |
|
428 |
pages = docHeaderParse[0][6] |
|
429 |
|
|
430 |
} else { // date |
|
431 |
docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) ([0-9][0-9]?) (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) ([0-9]{4}))(.*)$/) |
|
432 |
if (docHeaderParse.size() == 1) { |
|
433 |
rubrique = "NA" |
|
434 |
date = docHeaderParse[0][2] |
|
435 |
style = java.text.DateFormat.FULL |
|
436 |
df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE) |
|
437 |
try { |
|
438 |
potentialDate = df.parse(date) |
|
439 |
formatter = new java.text.SimpleDateFormat("yyyy-MM-dd") |
|
440 |
date = formatter.format(potentialDate) |
|
441 |
} catch (Exception e) { |
|
442 |
println "can't parse date: '$date'" |
|
443 |
date = "NA" |
|
444 |
} |
|
445 |
textorder = date |
|
446 |
words = "NA" |
|
447 |
pages = "NA" |
|
448 |
} else { |
|
449 |
rubrique = "NA" |
|
450 |
date = "NA" |
|
451 |
textorder = date |
|
452 |
words = "NA" |
|
453 |
pages = "NA" |
|
454 |
} |
|
455 |
} |
|
456 |
} else if (k == "DocPublicationName") ref = value.trim() |
|
457 |
|
|
458 |
metadataWriter.print columnSeparator+txtSeparator+value.replaceAll("\n", "").trim().replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator |
|
459 |
} |
|
460 |
ref = ref+", "+date |
|
461 |
metadataWriter.print columnSeparator+txtSeparator+rubrique.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator |
|
462 |
metadataWriter.print columnSeparator+txtSeparator+date.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator |
|
463 |
metadataWriter.print columnSeparator+txtSeparator+words+txtSeparator |
|
464 |
metadataWriter.print columnSeparator+txtSeparator+pages+txtSeparator |
|
465 |
metadataWriter.print columnSeparator+txtSeparator+textorder+txtSeparator |
|
466 |
metadataWriter.println "" |
|
467 |
|
|
468 |
// write content |
|
469 |
def writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(xmlFile) , "UTF-8")) |
|
470 |
writer.println "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" |
|
471 |
textContent.name = "text" // set root tag to "text" |
|
472 |
textContent["@id"] = textId // set the text id |
|
473 |
textContent["@ref"] = ref |
|
474 |
|
|
475 |
// write XML file |
|
476 |
new XmlNodePrinter(writer).print(textContent) |
|
477 |
writer.close() |
|
478 |
} |
|
479 |
|
|
480 |
// File xmlFile = |
|
481 |
//println textMetadata.size() |
|
482 |
} |
|
483 |
|
|
484 |
metadataWriter.close() |
|
485 |
|
|
486 |
if (ignored.size() > 0) { |
|
487 |
File ignoredFile = new File (duplicates, "ignored.txt") |
|
488 |
ignoredFile.withWriter("UTF-8") { writer -> |
|
489 |
writer.println "TOTAL: "+ignored.size() |
|
490 |
for (int i = 0 ; i < ignored.size() ; i++) { |
|
491 |
def sign = ignored[i] |
|
492 |
writer.println "\n**DUPLICATE\n " |
|
493 |
writer.println "keeped="+dones[sign] |
|
494 |
writer.println "duplicates="+ignoreds[i] |
|
495 |
writer.println "SIGN="+sign |
|
496 |
writer.println "\n" |
|
497 |
} |
|
498 |
} |
|
499 |
println "TOTAL IGNORED: "+ignored.size() |
|
500 |
println " see $ignoredFile for text IDs" |
|
501 |
} |
|
502 |
|
|
503 |
println "$itext articles found." |
|
504 |
if (debug) { |
|
505 |
tmpXhtmlOutput.deleteDir() |
|
506 |
tmpDir.deleteDir() |
|
507 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/debug/PreferencesMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.debug |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcp.swt.widget.parameters.* |
|
7 |
import org.txm.core.preferences.TXMPreferences |
|
8 |
import org.txm.searchengine.cqp.CQPPreferences |
|
9 |
|
|
10 |
//org.txm.core.preferences.TXMPreferences.dump(); |
|
11 |
|
|
12 |
println TXMPreferences.getString(CQPPreferences.CQI_SERVER_PATH_TO_CQPLIB, CQPPreferences.PREFERENCES_NODE); |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/PlotEllipsesMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macroproto |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
|
|
6 |
import groovy.transform.Field |
|
7 |
|
|
8 |
import org.txm.ca.core.functions.CA |
|
9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
10 |
import org.txm.statsengine.r.core.RWorkspace |
|
11 |
|
|
12 |
// BEGINNING OF PARAMETERS |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof CA)) { |
|
15 |
println "Selection is not a CA. Please select a CA result in the Corpus view" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg") |
|
20 |
def outputFile |
|
21 |
|
|
22 |
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row") |
|
23 |
def draw |
|
24 |
// Open the parameters input dialog box |
|
25 |
if (!ParametersDialog.open(this)) return; |
|
26 |
|
|
27 |
// END OF PARAMETERS |
|
28 |
def ca = corpusViewSelection |
|
29 |
def s = ca.getSymbol() |
|
30 |
def RW = RWorkspace.getRWorkspaceInstance() |
|
31 |
|
|
32 |
def script = """ |
|
33 |
plot($s); |
|
34 |
ellipseCA($s, ellipse=c("$draw")); |
|
35 |
""" |
|
36 |
|
|
37 |
RW.plot(outputFile, script); |
|
38 |
|
|
39 |
println "Done: "+outputFile.getAbsolutePath() |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/stats/PlotEllipsesMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
7 |
import org.txm.functions.ca.CA |
|
8 |
import org.txm.stat.engine.r.RWorkspace |
|
9 |
// BEGINNING OF PARAMETERS |
|
10 |
|
|
11 |
if (!(corpusViewSelection instanceof CA)) { |
|
12 |
println "selection is not a CA. Please select a CA result in the Corpus view" |
|
13 |
return; |
|
14 |
} |
|
15 |
|
|
16 |
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg") |
|
17 |
def outputFile |
|
18 |
|
|
19 |
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row") |
|
20 |
def draw |
|
21 |
// Open the parameters input dialog box |
|
22 |
if (!ParametersDialog.open(this)) return; |
|
23 |
|
|
24 |
// END OF PARAMETERS |
|
25 |
def ca = corpusViewSelection |
|
26 |
def s = ca.getSymbol() |
|
27 |
def RW = RWorkspace.getRWorkspaceInstance() |
|
28 |
|
|
29 |
def script = """ |
|
30 |
plot($s); |
|
31 |
ellipseCA($s, ellipse=c("$draw")); |
|
32 |
""" |
|
33 |
|
|
34 |
RW.plot(outputFile, script); |
|
35 |
|
|
36 |
println "Done: "+outputFile.getAbsolutePath() |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/ApplyXQueryMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
|
|
5 |
import org.kohsuke.args4j.* |
|
6 |
import groovy.transform.Field |
|
7 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
8 |
import org.txm.searchengine.cqp.corpus.* |
|
9 |
import net.sf.saxon.* |
|
10 |
import net.sf.saxon.query.* |
|
11 |
import net.sf.saxon.om.* |
|
12 |
import javax.xml.transform.* |
|
13 |
import javax.xml.transform.sax.* |
|
14 |
import javax.xml.transform.stream.* |
|
15 |
import org.xml.sax.* |
|
16 |
import javax.xml.xpath.* |
|
17 |
import net.sf.saxon.event.* |
|
18 |
import org.w3c.dom.* |
|
19 |
import net.sf.saxon.s9api.* |
|
20 |
import javax.xml.parsers.* |
|
21 |
|
|
22 |
// BEGINNING OF PARAMETERS |
|
23 |
|
|
24 |
if (!(corpusViewSelection instanceof Corpus)) { |
|
25 |
println "Error: Selection must be a corpus" |
|
26 |
return false; |
|
27 |
} |
|
28 |
|
|
29 |
@Field @Option(name="xqFile", usage="a Xquery file", widget="FileOpen", required=true, def="C:/Temp/foo.xq") |
|
30 |
def xqFile |
|
31 |
@Field @Option(name="outFile", usage="optional output file", widget="File", required=false, def="") |
|
32 |
def outFile |
|
33 |
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false") |
|
34 |
def debug |
|
35 |
|
|
36 |
// Open the parameters input dialog box |
|
37 |
if (!ParametersDialog.open(this)) return; |
|
38 |
|
|
39 |
// END OF PARAMETERS |
|
40 |
|
|
41 |
println "corpora selection: "+corpusViewSelection |
|
42 |
if (!xqFile.getName().endsWith(".xq")) { |
|
43 |
println "Error: Xquery selected file is not a '.xd' file: $xdFile" |
|
44 |
return false; |
|
45 |
} |
|
46 |
|
|
47 |
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus(); |
|
48 |
File binDir = mainCorpus.getBaseDirectory(); |
|
49 |
File txmDir = new File(binDir, "txm/"+mainCorpus.getName()); |
|
50 |
|
|
51 |
if (!txmDir.exists()) { |
|
52 |
println "Error: the 'txm' directory does not exist: $txmDir" |
|
53 |
return false; |
|
54 |
} |
|
55 |
|
|
56 |
def xmlFiles = txmDir.listFiles(); |
|
57 |
if (xmlFiles == null || xmlFiles.size() == 0) { |
|
58 |
println "Error: no file found in $txmDir" |
|
59 |
return false; |
|
60 |
} |
|
61 |
|
|
62 |
String query = """<matches> |
|
63 |
{ |
|
64 |
for \$t in fn:collection('$txmDir') |
|
65 |
for \$w in \$t//tei:w |
|
66 |
let \$pos := \$w/txm:ana[@type="#frpos"]/text() |
|
67 |
return <match>{\$w/@id}</match> |
|
68 |
} |
|
69 |
</matches> |
|
70 |
""" |
|
71 |
|
|
72 |
Processor processor = new Processor(false) |
|
73 |
XQueryCompiler xqc = processor.newXQueryCompiler() |
|
74 |
xqc.declareNamespace("tei", "http://www.tei-c.org/ns/1.0") |
|
75 |
xqc.declareNamespace("txm", "http://textometrie.org/1.0") |
|
76 |
xqc.declareNamespace("fn", "http://www.w3.org/2005/xpath-functions") |
|
77 |
XQueryExecutable exp = xqc.compile(query) |
|
78 |
|
|
79 |
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); |
|
80 |
dfactory.setNamespaceAware(true); |
|
81 |
Document dom = dfactory.newDocumentBuilder().newDocument(); |
|
82 |
exp.load().run(new DOMDestination(dom)); |
|
83 |
if (outFile instanceof File && outFile.getName().length() > 0) { |
|
84 |
def writer = outFile.newWriter("UTF-8") |
|
85 |
writer.println dom.getDocumentElement() |
|
86 |
writer.close() |
|
87 |
println "Result written in "+outFile.getAbsolutePath() |
|
88 |
} else { |
|
89 |
println dom.getDocumentElement() |
|
90 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/XSL2CQPMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
7 |
import org.txm.searchengine.cqp.corpus.* |
|
8 |
import org.txm.importer.ApplyXsl2; |
|
9 |
import groovy.util.XmlParser |
|
10 |
|
|
11 |
// BEGINNING OF PARAMETERS |
|
12 |
|
|
13 |
if (!(corpusViewSelection instanceof Corpus)) { |
|
14 |
println "Error: Selection must be a corpus" |
|
15 |
return false; |
|
16 |
} |
|
17 |
|
|
18 |
@Field @Option(name="xslFile", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt") |
|
19 |
def xslFile |
|
20 |
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false") |
|
21 |
def debug |
|
22 |
|
|
23 |
// Open the parameters input dialog box |
|
24 |
if (!ParametersDialog.open(this)) return; |
|
25 |
|
|
26 |
// END OF PARAMETERS |
|
27 |
|
|
28 |
println "corpora selection: "+corpusViewSelection |
|
29 |
if (!xslFile.getName().endsWith(".xsl")) { |
|
30 |
println "Error: XSL selected file is not a '.xsl' file: $xslFile" |
|
31 |
return false; |
|
32 |
} |
|
33 |
|
|
34 |
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus(); |
|
35 |
File binDir = mainCorpus.getBaseDirectory(); |
|
36 |
File txmDir = new File(binDir, "txm/"+mainCorpus.getName()); |
|
37 |
File resultsDir = new File(binDir, "results"); |
|
38 |
resultsDir.mkdirs() |
|
39 |
|
|
40 |
if (!txmDir.exists()) { |
|
41 |
println "Error: the 'txm' directory does not exist: $txmDir" |
|
42 |
return false; |
|
43 |
} |
|
44 |
|
|
45 |
def xmlFiles = txmDir.listFiles(); |
|
46 |
if (xmlFiles == null || xmlFiles.size() == 0) { |
|
47 |
println "Error: no file found in $txmDir" |
|
48 |
return false; |
|
49 |
} |
|
50 |
|
|
51 |
HashSet<List<String>> allmatches = new HashSet<String>(); |
|
52 |
ApplyXsl2 a = new ApplyXsl2(xslFile.getAbsolutePath()); |
|
53 |
println "Querying..." |
|
54 |
for (File xmlFile : xmlFiles) { |
|
55 |
println " "+xmlFile.getName() |
|
56 |
File resultFile = new File(resultsDir, "xslqueryresult_"+xmlFile.getName()); |
|
57 |
a.process(xmlFile, resultFile); |
|
58 |
|
|
59 |
def matches = new XmlParser().parse(resultFile) |
|
60 |
matches.match.each() { match -> |
|
61 |
def l = []; |
|
62 |
match.wRef.each() { l.add(it.attribute("id")); } |
|
63 |
allmatches << l |
|
64 |
} |
|
65 |
if (!debug) resultFile.delete() |
|
66 |
} |
|
67 |
|
|
68 |
//println "Matches: " |
|
69 |
//for (def m : allmatches) println " "+m |
|
70 |
|
|
71 |
def subqueries = [] |
|
72 |
for (def m : allmatches) { |
|
73 |
if (m.size() == 1) subqueries << "[id=\""+m[0]+"\"]" |
|
74 |
else if (m.size() == 2) |
|
75 |
subqueries << "[id=\""+m[0]+"\"][]" |
|
76 |
else |
|
77 |
subqueries << "[id=\""+m[0]+"\"]"+"[]{"+(m.size-1)+"}" |
|
78 |
} |
|
79 |
def query = subqueries.join("|") |
|
80 |
def initialquery = query |
|
81 |
while (query.length() > 1500) { |
|
82 |
oldquery = query |
|
83 |
query = query.substring(0, query.indexOf("|")) |
|
84 |
println "Warning : query has been truncated: "+oldquery |
|
85 |
} |
|
86 |
|
|
87 |
println "CQL: $query" |
|
88 |
if (debug) println "See debug files in: "+resultsDir |
|
89 |
if (initialquery != query) println "Initial CQL: $initialquery" |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/partition/PartsSizeMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
5 |
|
|
6 |
if (!(corpusViewSelection instanceof Partition)) { |
|
7 |
println "Select a partition before calling this macro." |
|
8 |
return; |
|
9 |
} |
|
10 |
Partition p = corpusViewSelection |
|
11 |
|
|
12 |
for (def part : p.getParts()) { |
|
13 |
println part.getName()+"\t"+part.getSize() |
|
14 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/DirectoryInfoMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import java.nio.file.Path |
|
7 |
import java.nio.file.attribute.FileOwnerAttributeView |
|
8 |
import java.nio.file.attribute.UserPrincipal |
|
9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
10 |
import org.txm.Toolbox |
|
11 |
import org.txm.searchengine.cqp.* |
|
12 |
import java.io.IOException; |
|
13 |
import java.nio.file.* |
|
14 |
import java.nio.file.attribute.*; |
|
15 |
|
|
16 |
// Parameter declaration - Déclaration du paramètre |
|
17 |
@Field @Option(name="directory", usage="the directory to diagnose", widget="Folder", required=true, def="set da enpos;") |
|
18 |
File directory; |
|
19 |
|
|
20 |
// Parameters settings UI |
|
21 |
if (!ParametersDialog.open(this)) { |
|
22 |
println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.") |
|
23 |
return |
|
24 |
} |
|
25 |
|
|
26 |
println "full path="+directory.getAbsolutePath() |
|
27 |
println " exists? "+directory.exists() |
|
28 |
println " read? "+directory.canRead() |
|
29 |
println " write? "+directory.canWrite() |
|
30 |
println " executable? "+directory.canExecute() |
|
31 |
println " hidden? "+directory.isHidden() |
|
32 |
def files = directory.listFiles() |
|
33 |
println " number of files? "+files.size() |
|
34 |
|
|
35 |
|
|
36 |
Path path = Paths.get(directory.getAbsolutePath()); |
|
37 |
|
|
38 |
FileOwnerAttributeView ownerAttributeView = Files.getFileAttributeView(path, FileOwnerAttributeView.class); |
|
39 |
if (ownerAttributeView != null) { |
|
40 |
UserPrincipal owner = ownerAttributeView.getOwner(); |
|
41 |
if (owner != null) println " file owner attribute: "+owner.getName() |
|
42 |
} |
|
43 |
|
|
44 |
AclFileAttributeView aclAttributeView = Files.getFileAttributeView(path, AclFileAttributeView.class); |
|
45 |
if (aclAttributeView != null) { |
|
46 |
List<AclEntry> acl = aclAttributeView.getAcl(); |
|
47 |
if (acl != null) { |
|
48 |
for (AclEntry entry : acl) |
|
49 |
if (acl != null) println " acl entry: "+entry |
|
50 |
} |
|
51 |
} |
|
52 |
|
|
53 |
BasicFileAttributeView basicAttributeView = Files.getFileAttributeView(path, BasicFileAttributeView.class); |
|
54 |
if (basicAttributeView != null) { |
|
55 |
BasicFileAttributes attributes = basicAttributeView.readAttributes() |
|
56 |
if (attributes != null) { |
|
57 |
println " basic attributes: creation time: "+attributes.creationTime() |
|
58 |
println " basic attributes: last access time: "+attributes.lastAccessTime() |
|
59 |
println " basic attributes: last modification time: "+attributes.lastModifiedTime() |
|
60 |
println " basic attributes: file key: "+attributes.fileKey() |
|
61 |
println " basic attributes: directory file?: "+attributes.isDirectory() |
|
62 |
println " basic attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
63 |
println " basic attributes: regular file?: "+attributes.isRegularFile() |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
DosFileAttributeView dosAttributeView = Files.getFileAttributeView(path, DosFileAttributeView.class); |
|
68 |
if (dosAttributeView != null) { |
|
69 |
DosFileAttributes attributes = dosAttributeView.readAttributes() |
|
70 |
if (attributes != null) { |
|
71 |
println " dos attributes: creation time: "+attributes.creationTime() |
|
72 |
println " dos attributes: last access time: "+attributes.lastAccessTime() |
|
73 |
println " dos attributes: last modification time: "+attributes.lastModifiedTime() |
|
74 |
println " dos attributes: file key: "+attributes.fileKey() |
|
75 |
println " dos attributes: directory file?: "+attributes.isDirectory() |
|
76 |
println " dos attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
77 |
println " dos attributes: regular file?: "+attributes.isRegularFile() |
|
78 |
println " dos attributes: archive file?: "+attributes.isArchive() |
|
79 |
println " dos attributes: system file?: "+attributes.isSystem() |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
PosixFileAttributeView posixAttributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class); |
|
84 |
if (posixAttributeView != null) { |
|
85 |
PosixFileAttributes attributes = posixAttributeView.readAttributes() |
|
86 |
if (attributes != null) { |
|
87 |
println " posix attributes: group: "+attributes.group() |
|
88 |
println " posix attributes: creation time: "+attributes.creationTime() |
|
89 |
println " posix attributes: last access time: "+attributes.lastAccessTime() |
|
90 |
println " posix attributes: last modification time: "+attributes.lastModifiedTime() |
|
91 |
println " posix attributes: file key: "+attributes.fileKey() |
|
92 |
println " posix attributes: directory file?: "+attributes.isDirectory() |
|
93 |
println " posix attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
94 |
println " posix attributes: regular file?: "+attributes.isRegularFile() |
|
95 |
println " posix attributes: permissions: "+attributes.permissions().sort() |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
UserDefinedFileAttributeView userdefinedAttributeView = Files.getFileAttributeView(path, UserDefinedFileAttributeView.class); |
|
100 |
if (userdefinedAttributeView != null) { |
|
101 |
def attributes = userdefinedAttributeView.list() |
|
102 |
if (attributes != null) { |
|
103 |
for (def entry : attributes) |
|
104 |
println " user defined attributes: "+entry |
|
105 |
} |
|
106 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/SetFileRightsMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
7 |
|
|
8 |
// BEGINNING OF PARAMETERS |
|
9 |
|
|
10 |
@Field @Option(name="file", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt") |
|
11 |
def file |
|
12 |
|
|
13 |
@Field @Option(name="read_right", usage="read", widget="Boolean", required=true, def="true") |
|
14 |
def read_right |
|
15 |
@Field @Option(name="write_right", usage="write", widget="Boolean", required=true, def="true") |
|
16 |
def write_right |
|
17 |
@Field @Option(name="execute_right", usage="execute", widget="Boolean", required=true, def="true") |
|
18 |
def execute_right |
|
19 |
|
|
20 |
@Field @Option(name="current_user_only", usage="read", widget="Boolean", required=true, def="false") |
|
21 |
def current_user_only |
|
22 |
|
|
23 |
// Open the parameters input dialog box |
|
24 |
if (!ParametersDialog.open(this)) return; |
|
25 |
|
|
26 |
// END OF PARAMETERS |
|
27 |
|
|
28 |
if (file.exists()) { |
|
29 |
|
|
30 |
file.setReadable(read_right, current_user_only); |
|
31 |
file.setWritable(write_right, current_user_only) |
|
32 |
file.setExecutable(execute_right, current_user_only); |
|
33 |
|
|
34 |
} else { |
|
35 |
println "Error: file not found $file" |
|
36 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/ExecPythonMacro.groovy (revision 1543) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
7 |
|
|
8 |
// BEGINNING OF PARAMETERS |
|
9 |
|
|
10 |
// Declare each parameter here |
|
11 |
// (available widget types: Query, File, Folder, String, Text, Boolean, Integer, Float and Date) |
|
12 |
|
|
13 |
@Field @Option(name="pythonFile", usage="an example file", widget="FileOpen", required=true, def="script.py") |
|
14 |
def pythonFile |
|
15 |
|
|
16 |
// Parameters settings UI |
|
17 |
if (!ParametersDialog.open(this)) { |
|
18 |
println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.") |
|
19 |
return |
|
20 |
} |
|
21 |
|
|
22 |
def process = "python $pythonFile".execute() |
|
23 |
process.text.eachLine {println it} |
|
24 |
|
|
25 |
def exitValue = process.exitValue() |
|
26 |
if (exitValue != 0) println "Error during execution: $exitValue" |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ExecXSLDOMMacro.groovy (revision 1543) | ||
---|---|---|
1 |
package org.txm.macro.xml; |
|
2 |
// STANDARD DECLARATIONS |
|
3 |
|
|
4 |
import org.kohsuke.args4j.* |
|
5 |
import groovy.transform.Field |
|
6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
7 |
import org.txm.importer.ApplyXsl2; |
|
8 |
import javax.xml.transform.stream.* |
|
9 |
import javax.xml.transform.dom.DOMResult |
|
10 |
import org.w3c.dom.* |
|
11 |
|
|
12 |
|
|
13 |
// BEGINNING OF PARAMETERS |
|
14 |
@Field @Option(name="XSLFile", usage="an example file", widget="File", required=true, def="file.xsl") |
|
15 |
def XSLFile = new File(System.getProperty("user.home"),"TXM/xsl/identity.xsl") |
|
16 |
|
|
17 |
@Field @Option(name="intputDirectory", usage="an example folder", widget="Folder", required=true, def="in") |
|
18 |
def intputDirectory = new File(System.getProperty("user.home"),"xml/TESTS2/xml") |
|
19 |
|
|
20 |
//@Field @Option(name="parameters", usage="an example folder", widget="Text", required=false, def="") |
|
21 |
def parameters = [:] |
|
22 |
|
|
23 |
@Field @Option(name="dom", usage="XSLT Result is - true: a DOM Element. false - a XSLT Result is XMLStreamReader", widget="Boolean", required=true, def="true") |
|
24 |
def dom |
|
25 |
|
|
26 |
@Field @Option(name="debug", usage="Show debug messages, value = true|false", widget="Boolean", required=true, def="false") |
|
27 |
def debug |
|
28 |
|
|
29 |
if (!ParametersDialog.open(this)) return; |
|
30 |
// END OF PARAMETERS |
|
31 |
|
|
32 |
// USER MANIPULATIONS |
|
33 |
|
|
34 |
def processDOMResult(File inputXMLFile, def resultnode) { |
|
35 |
// with resultnode a Element : https://docs.oracle.com/javase/8/docs/api/org/w3c/dom/Element.html |
|
36 |
println inputXMLFile.getName()+" -> "+ resultnode.getTagName() |
|
37 |
} |
|
38 |
|
|
39 |
// END USER MANIPULATIONS |
|
40 |
|
|
41 |
println "Use XSL $XSLFile with parameters $parameters" |
|
42 |
println "Processed directory: $intputDirectory" |
|
43 |
|
|
44 |
def files = [] |
|
45 |
ApplyXsl2 a = new ApplyXsl2(XSLFile.getAbsolutePath()); |
|
46 |
intputDirectory.eachFileMatch(~/.+\.(xml|XML)/) { XMLFile -> |
|
47 |
String name = XMLFile.getName() |
|
48 |
try { |
|
49 |
def result = process(a, XMLFile, [:]); |
|
50 |
if (dom) processDOMResult(XMLFile, result.getNode().getDocumentElement()); |
|
51 |
else processSaxResult(XMLFile, result); |
|
52 |
files << XMLFile |
|
53 |
} catch (Exception e) { |
|
54 |
println "Warning: XSL transformation of '$name' failed with error=$e with " |
|
55 |
if (debug) e.printStackTrace(); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
def process(ApplyXsl2 a, File inputXMLFile, def args) throws Exception { |
|
60 |
for (String k : args.keySet()) { |
|
61 |
if (!this.setParam(k, args[k])) |
|
62 |
return false; |
|
63 |
} |
|
64 |
|
|
65 |
def result = null; |
|
66 |
if (dom) result = new DOMResult(); |
|
67 |
else { |
|
68 |
XMLStreamReader xmlreader = new XMLStreamReader(); |
|
69 |
PipedInputStream inpipe = new PipedInputStream(xmlreader |
|
70 |
PipedOutputStream outpipe = new PipedOutputStream(); |
|
71 |
result = new StreamResult(new BufferedOutputStream(new FileOutputStream(xmloutfile))); |
|
72 |
} |
|
73 |
a.transformer.transform(new StreamSource(inputXMLFile), result); |
|
74 |
a.cleanMemory(); // save memory |
|
75 |
a.resetParams() |
|
76 |
return result; |
|
77 |
} |
Formats disponibles : Unified diff