Révision 1543
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/conversion/EuroPressToXML2018Macro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
package org.txm.macro.conversion |
|
| 2 |
// STANDARD DECLARATIONS |
|
| 3 |
|
|
| 4 |
import groovy.xml.QName |
|
| 5 |
import java.text.DecimalFormat |
|
| 6 |
import org.txm.importer.DomUtils |
|
| 7 |
import org.txm.importer.ValidateXml |
|
| 8 |
import org.w3c.tidy.Tidy |
|
| 9 |
import groovy.util.XmlParser |
|
| 10 |
import org.kohsuke.args4j.* |
|
| 11 |
import groovy.transform.Field |
|
| 12 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 13 |
import org.jsoup.Jsoup |
|
| 14 |
import org.jsoup.nodes.Document.OutputSettings.Syntax |
|
| 15 |
|
|
| 16 |
// BEGINNING OF PARAMETERS |
|
| 17 |
|
|
| 18 |
@Field @Option(name="inputDir", usage="The directory containing the html files, to export from the Europress portal", widget="Folder", required=true, def="") |
|
| 19 |
def inputDir |
|
| 20 |
|
|
| 21 |
@Field @Option(name="inputEncoding", usage="character encoding used in the HTML exported files", widget="String", required=false, def="iso-8859-1") |
|
| 22 |
String inputEncoding |
|
| 23 |
|
|
| 24 |
@Field @Option(name="outputDir", usage="The directory containing the result files, to import with the XTZ+CSV import module into TXM", widget="Folder", required=true, def="") |
|
| 25 |
def outputDir |
|
| 26 |
|
|
| 27 |
@Field @Option(name="corpusName", usage="corpus name", widget="String", required=true, def="") |
|
| 28 |
String corpusName |
|
| 29 |
|
|
| 30 |
@Field @Option(name="columnSeparator",usage="", widget="String", required=false, def=",") |
|
| 31 |
def columnSeparator |
|
| 32 |
|
|
| 33 |
@Field @Option(name="txtSeparator",usage="", widget="String", required=false, def="\"") |
|
| 34 |
def txtSeparator |
|
| 35 |
|
|
| 36 |
@Field @Option(name="debug", usage="show debug messages and keep temporary results", widget="Boolean", required=false, def="false") |
|
| 37 |
def debug |
|
| 38 |
|
|
| 39 |
// Open the parameters input dialog box |
|
| 40 |
if (!ParametersDialog.open(this)) return |
|
| 41 |
|
|
| 42 |
// END OF PARAMETERS |
|
| 43 |
|
|
| 44 |
if (!inputDir.exists()) {
|
|
| 45 |
println "** inputDir does not exist: $inputDir, aborting." |
|
| 46 |
return false |
|
| 47 |
} |
|
| 48 |
|
|
| 49 |
xslposttokContent = """<?xml version="1.0"?> |
|
| 50 |
<xsl:stylesheet xmlns:edate="http://exslt.org/dates-and-times" |
|
| 51 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" |
|
| 52 |
xmlns:txm="http://textometrie.org/ns/1.0" |
|
| 53 |
exclude-result-prefixes="tei edate" xpath-default-namespace="http://www.tei-c.org/ns/1.0" version="2.0"> |
|
| 54 |
|
|
| 55 |
<!-- |
|
| 56 |
This software is dual-licensed: |
|
| 57 |
|
|
| 58 |
1. Distributed under a Creative Commons Attribution-ShareAlike 3.0 |
|
| 59 |
Unported License http://creativecommons.org/licenses/by-sa/3.0/ |
|
| 60 |
|
|
| 61 |
2. http://www.opensource.org/licenses/BSD-2-Clause |
|
| 62 |
|
|
| 63 |
All rights reserved. |
|
| 64 |
|
|
| 65 |
Redistribution and use in source and binary forms, with or without |
|
| 66 |
modification, are permitted provided that the following conditions are |
|
| 67 |
met: |
|
| 68 |
|
|
| 69 |
* Redistributions of source code must retain the above copyright |
|
| 70 |
notice, this list of conditions and the following disclaimer. |
|
| 71 |
|
|
| 72 |
* Redistributions in binary form must reproduce the above copyright |
|
| 73 |
notice, this list of conditions and the following disclaimer in the |
|
| 74 |
documentation and/or other materials provided with the distribution. |
|
| 75 |
|
|
| 76 |
This software is provided by the copyright holders and contributors |
|
| 77 |
"as is" and any express or implied warranties, including, but not |
|
| 78 |
limited to, the implied warranties of merchantability and fitness for |
|
| 79 |
a particular purpose are disclaimed. In no event shall the copyright |
|
| 80 |
holder or contributors be liable for any direct, indirect, incidental, |
|
| 81 |
special, exemplary, or consequential damages (including, but not |
|
| 82 |
limited to, procurement of substitute goods or services; loss of use, |
|
| 83 |
data, or profits; or business interruption) however caused and on any |
|
| 84 |
theory of liability, whether in contract, strict liability, or tort |
|
| 85 |
(including negligence or otherwise) arising in any way out of the use |
|
| 86 |
of this software, even if advised of the possibility of such damage. |
|
| 87 |
|
|
| 88 |
|
|
| 89 |
This stylesheet adds a ref attribute to w elements that will be used for |
|
| 90 |
references in TXM concordances. Can be used with TXM XTZ import module. |
|
| 91 |
|
|
| 92 |
w ref is composed of : |
|
| 93 |
- docpublicationname |
|
| 94 |
- date |
|
| 95 |
|
|
| 96 |
Written by Alexei Lavrentiev, UMR 5317 IHRIM, 2017 |
|
| 97 |
Serge Heiden, UMR 5317 IHRIM, 2018 |
|
| 98 |
--> |
|
| 99 |
|
|
| 100 |
|
|
| 101 |
<xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/> |
|
| 102 |
|
|
| 103 |
|
|
| 104 |
<!-- General patterns: all elements, attributes, comments and processing instructions are copied --> |
|
| 105 |
|
|
| 106 |
<xsl:template match="*"> |
|
| 107 |
<xsl:copy> |
|
| 108 |
<xsl:apply-templates select="*|@*|processing-instruction()|comment()|text()"/> |
|
| 109 |
</xsl:copy> |
|
| 110 |
</xsl:template> |
|
| 111 |
|
|
| 112 |
<xsl:template match="*" mode="position"><xsl:value-of select="count(preceding-sibling::*)"/></xsl:template> |
|
| 113 |
|
|
| 114 |
<xsl:template match="@*|comment()|processing-instruction()"> |
|
| 115 |
<xsl:copy/> |
|
| 116 |
</xsl:template> |
|
| 117 |
|
|
| 118 |
<xsl:template match="*:w"> |
|
| 119 |
<xsl:variable name="ref"> |
|
| 120 |
|
|
| 121 |
<xsl:choose> |
|
| 122 |
<xsl:when test="ancestor::*:text[1]/@ref"> |
|
| 123 |
<!-- <xsl:text>ref: </xsl:text> --> |
|
| 124 |
<xsl:value-of select="ancestor::*:text[1]/@ref"/> |
|
| 125 |
</xsl:when> |
|
| 126 |
<xsl:otherwise> |
|
| 127 |
<!-- <xsl:text>[NO docpublicationname]</xsl:text> --> |
|
| 128 |
</xsl:otherwise> |
|
| 129 |
</xsl:choose> |
|
| 130 |
<xsl:choose> |
|
| 131 |
<xsl:when test="ancestor::*:text[1]/@docpublicationname"> |
|
| 132 |
<!-- <xsl:text>docpublicationname: </xsl:text> --> |
|
| 133 |
<xsl:value-of select="ancestor::*:text[1]/@docpublicationname"/> |
|
| 134 |
</xsl:when> |
|
| 135 |
<xsl:otherwise> |
|
| 136 |
<!-- <xsl:text>[NO docpublicationname]</xsl:text> --> |
|
| 137 |
</xsl:otherwise> |
|
| 138 |
</xsl:choose> |
|
| 139 |
<xsl:choose> |
|
| 140 |
<xsl:when test="ancestor::*:text[1]/@date"> |
|
| 141 |
<!-- <xsl:text>date: </xsl:text> --> |
|
| 142 |
<xsl:value-of select="ancestor::*:text[1]/@date"/> |
|
| 143 |
</xsl:when> |
|
| 144 |
<xsl:otherwise> |
|
| 145 |
<!-- <xsl:text>[NO date]</xsl:text> --> |
|
| 146 |
</xsl:otherwise> |
|
| 147 |
</xsl:choose> |
|
| 148 |
|
|
| 149 |
<!-- |
|
| 150 |
<xsl:if test="ancestor::*:text/@*:id and preceding::*:pb[1]/@n"> |
|
| 151 |
<xsl:text>, </xsl:text> |
|
| 152 |
</xsl:if> |
|
| 153 |
--> |
|
| 154 |
<xsl:if test="ancestor::*:p[1]/@n"> |
|
| 155 |
<xsl:text>§ </xsl:text> |
|
| 156 |
<xsl:value-of select="ancestor::*:p[1]/@n"/> |
|
| 157 |
</xsl:if> |
|
| 158 |
<!-- |
|
| 159 |
<xsl:if test="preceding::*:pb[1]/@n"> |
|
| 160 |
<xsl:text>p. </xsl:text> |
|
| 161 |
<xsl:value-of select="preceding::*:pb[1]/@n"/> |
|
| 162 |
</xsl:if> <xsl:if test="(ancestor::*:text/@*:id or preceding::*:pb[1]/@n) and preceding::*:lb[1]/@n"> |
|
| 163 |
<xsl:text>, </xsl:text> |
|
| 164 |
</xsl:if> |
|
| 165 |
<xsl:if test="preceding::*:lb[1]/@n"> |
|
| 166 |
<xsl:text>l. </xsl:text> |
|
| 167 |
<xsl:value-of select="preceding::*:lb[1]/@n"/> |
|
| 168 |
</xsl:if> |
|
| 169 |
--> |
|
| 170 |
</xsl:variable> |
|
| 171 |
<xsl:copy> |
|
| 172 |
<xsl:apply-templates select="@*"/> |
|
| 173 |
<xsl:attribute name="ref"><xsl:value-of select="\$ref"/></xsl:attribute> |
|
| 174 |
<xsl:apply-templates select="*|processing-instruction()|comment()|text()"/> |
|
| 175 |
</xsl:copy> |
|
| 176 |
</xsl:template> |
|
| 177 |
|
|
| 178 |
</xsl:stylesheet> |
|
| 179 |
""" |
|
| 180 |
|
|
| 181 |
cssContent = """/* |
|
| 182 |
Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 183 |
Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 184 |
@author cbourdot |
|
| 185 |
@author sheiden |
|
| 186 |
|
|
| 187 |
TXM default CSS 06-2017 |
|
| 188 |
|
|
| 189 |
*/ |
|
| 190 |
|
|
| 191 |
.txmeditionpage {
|
|
| 192 |
font-size: 14px; |
|
| 193 |
text-indent: none; |
|
| 194 |
text-align: justify; |
|
| 195 |
box-shadow: .3125em .3125em .625em 0 #888; |
|
| 196 |
margin: 1.25em auto; |
|
| 197 |
padding: 1.25em; |
|
| 198 |
width: 400px; |
|
| 199 |
min-height: 90%; |
|
| 200 |
} |
|
| 201 |
|
|
| 202 |
.txmeditionpb {
|
|
| 203 |
text-align: center; |
|
| 204 |
} |
|
| 205 |
|
|
| 206 |
.txmeditionpb::before {
|
|
| 207 |
content: "- "; |
|
| 208 |
} |
|
| 209 |
|
|
| 210 |
.txmeditionpb::after {
|
|
| 211 |
content: " -"; |
|
| 212 |
} |
|
| 213 |
|
|
| 214 |
.txmlettrinep:first-letter {
|
|
| 215 |
float: left; |
|
| 216 |
font-size: 6em; |
|
| 217 |
line-height: 1; |
|
| 218 |
margin-right: 0.2em; |
|
| 219 |
} |
|
| 220 |
|
|
| 221 |
a {
|
|
| 222 |
color:#802520; |
|
| 223 |
} |
|
| 224 |
|
|
| 225 |
h1 {
|
|
| 226 |
font-size: 20px; |
|
| 227 |
font-variant: small-caps; |
|
| 228 |
text-align: center; |
|
| 229 |
color:#802520; |
|
| 230 |
} |
|
| 231 |
|
|
| 232 |
h2 {
|
|
| 233 |
font-size: 18px; |
|
| 234 |
font-variant: small-caps; |
|
| 235 |
text-align: center; |
|
| 236 |
color:#802520; |
|
| 237 |
} |
|
| 238 |
|
|
| 239 |
h3 {
|
|
| 240 |
font-size: 16px; |
|
| 241 |
font-variant: small-caps; |
|
| 242 |
text-align: center; |
|
| 243 |
color:#802520; |
|
| 244 |
} |
|
| 245 |
|
|
| 246 |
p {
|
|
| 247 |
text-indent: 0.2cm; |
|
| 248 |
text-align: justify; |
|
| 249 |
text-justify: inter-word; |
|
| 250 |
} |
|
| 251 |
|
|
| 252 |
img {
|
|
| 253 |
margin: 10px 10px 10px 10px; |
|
| 254 |
} |
|
| 255 |
|
|
| 256 |
td[rend="table-cell-align-right"] {
|
|
| 257 |
text-align: right; |
|
| 258 |
} |
|
| 259 |
|
|
| 260 |
td[rend="table-cell-align-left"] {
|
|
| 261 |
text-align: left; |
|
| 262 |
} |
|
| 263 |
|
|
| 264 |
td[rend="table-cell-align-center"] {
|
|
| 265 |
text-align: center; |
|
| 266 |
} |
|
| 267 |
""" |
|
| 268 |
|
|
| 269 |
outputDir.deleteDir() |
|
| 270 |
outputDir.mkdir() |
|
| 271 |
outputDir = new File(outputDir, corpusName) |
|
| 272 |
outputDir.deleteDir() |
|
| 273 |
outputDir.mkdir() |
|
| 274 |
|
|
| 275 |
tmpDir = new File(outputDir, "tmp") |
|
| 276 |
tmpDir.deleteDir() |
|
| 277 |
tmpDir.mkdir() |
|
| 278 |
|
|
| 279 |
tmpXhtmlOutput = new File(outputDir, "xhtml") |
|
| 280 |
tmpXhtmlOutput.deleteDir() |
|
| 281 |
tmpXhtmlOutput.mkdir() |
|
| 282 |
|
|
| 283 |
duplicates = new File(outputDir, "duplicates") |
|
| 284 |
duplicates.deleteDir() |
|
| 285 |
duplicates.mkdir() |
|
| 286 |
|
|
| 287 |
xslDir = new File(outputDir, "xsl") |
|
| 288 |
xslDir.mkdir() |
|
| 289 |
xslposttokDir = new File(xslDir, "3-posttok") |
|
| 290 |
xslposttokDir.mkdir() |
|
| 291 |
xslposttokFile = new File(xslposttokDir, "txm-posttok-addRef-ref.xsl") |
|
| 292 |
cssDir = new File(outputDir, "css") |
|
| 293 |
cssDir.mkdir() |
|
| 294 |
cssFile = new File(cssDir, corpusName+".css") |
|
| 295 |
|
|
| 296 |
xslposttokFile << xslposttokContent |
|
| 297 |
cssFile << cssContent |
|
| 298 |
|
|
| 299 |
metadataFile = new File(outputDir, "metadata.csv") |
|
| 300 |
metadataWriter = metadataFile.newWriter("UTF-8")
|
|
| 301 |
|
|
| 302 |
int itext = 0 |
|
| 303 |
def formater = new DecimalFormat("0000")
|
|
| 304 |
|
|
| 305 |
// HTML elements containing metadata content, with @class=metadataKeys |
|
| 306 |
def metadataKeys = ["DocPublicationName", "DocHeader", "titreArticle"] |
|
| 307 |
|
|
| 308 |
// HTML elements containing text content, with @class=textClass |
|
| 309 |
def textClass = "docOcurrContainer" |
|
| 310 |
|
|
| 311 |
// write metadata header |
|
| 312 |
metadataWriter.print "id" |
|
| 313 |
metadataKeys.each { metadataWriter.print columnSeparator+it.toLowerCase() }
|
|
| 314 |
// DocHeader substrings |
|
| 315 |
metadataWriter.print columnSeparator+"rubrique" |
|
| 316 |
metadataWriter.print columnSeparator+"date" |
|
| 317 |
metadataWriter.print columnSeparator+"words" |
|
| 318 |
metadataWriter.print columnSeparator+"pages" |
|
| 319 |
metadataWriter.print columnSeparator+"textorder" // date |
|
| 320 |
metadataWriter.println "" |
|
| 321 |
|
|
| 322 |
def files = [] |
|
| 323 |
inputDir.eachFileMatch(~/.*\.(html|HTML)/){ htmlFile -> files << htmlFile}
|
|
| 324 |
files = files.sort() |
|
| 325 |
|
|
| 326 |
def done = new HashSet<String>() |
|
| 327 |
def ignored = [] |
|
| 328 |
def allTitles = new HashSet() |
|
| 329 |
def dones = [:] |
|
| 330 |
def ignoreds = [] |
|
| 331 |
|
|
| 332 |
// scan node for text content |
|
| 333 |
def getText(def node) {
|
|
| 334 |
//if (debug) println "node: "+node |
|
| 335 |
String s = " " |
|
| 336 |
if (node instanceof String) {
|
|
| 337 |
s += " "+node |
|
| 338 |
} else {
|
|
| 339 |
for(def c : node.children()) |
|
| 340 |
s += " "+getText(c) |
|
| 341 |
} |
|
| 342 |
//println " "+s.replace("\n", " ").trim()
|
|
| 343 |
return " "+s.replace("\n", " ").trim()
|
|
| 344 |
} |
|
| 345 |
|
|
| 346 |
println files.size()+" files to process." |
|
| 347 |
println "Creating $metadataFile" |
|
| 348 |
|
|
| 349 |
for (File htmlFile : files) {
|
|
| 350 |
println "Processing $htmlFile" |
|
| 351 |
|
|
| 352 |
String name = htmlFile.getName() |
|
| 353 |
name = name.substring(0, name.lastIndexOf("."))
|
|
| 354 |
|
|
| 355 |
File xhtmlFile = new File(tmpXhtmlOutput, name+".xhtml") |
|
| 356 |
|
|
| 357 |
|
|
| 358 |
if (inputEncoding.size() > 0) {
|
|
| 359 |
doc = Jsoup.parse(htmlFile, inputEncoding, "") |
|
| 360 |
} else{
|
|
| 361 |
doc = Jsoup.parse(htmlFile, "UTF8") |
|
| 362 |
} |
|
| 363 |
doc.outputSettings().escapeMode(org.jsoup.nodes.Entities.EscapeMode.xhtml) |
|
| 364 |
doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) |
|
| 365 |
|
|
| 366 |
xhtmlFile.withWriter("UTF-8") { out ->
|
|
| 367 |
out.print doc.html().replace("\"=\"\"", "")
|
|
| 368 |
} |
|
| 369 |
|
|
| 370 |
if (!ValidateXml.test(xhtmlFile)) {
|
|
| 371 |
println "Error: $xhtmlFile is malformed." |
|
| 372 |
continue |
|
| 373 |
} |
|
| 374 |
|
|
| 375 |
def root = new XmlParser(false, true, true).parse(xhtmlFile) |
|
| 376 |
|
|
| 377 |
// one <article> per text |
|
| 378 |
for (def article : root.body.article) {
|
|
| 379 |
def textMetadata = [:] |
|
| 380 |
|
|
| 381 |
for (def key : metadataKeys) {
|
|
| 382 |
def values = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == key }
|
|
| 383 |
textMetadata[key] = values |
|
| 384 |
} |
|
| 385 |
|
|
| 386 |
def textContent = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == textClass }
|
|
| 387 |
|
|
| 388 |
String sign = getText(textMetadata["titreArticle"]) // identify a text |
|
| 389 |
|
|
| 390 |
// build text id |
|
| 391 |
itext++ |
|
| 392 |
File xmlFile = xmlFile = new File(outputDir, name+"_"+formater.format((itext))+".xml") |
|
| 393 |
if (allTitles.contains(sign)) {
|
|
| 394 |
ignored << sign |
|
| 395 |
xmlFile = new File(duplicates, name+"_"+formater.format(itext)+".xml") |
|
| 396 |
ignoreds << xmlFile.getName() |
|
| 397 |
} |
|
| 398 |
allTitles.add(sign) |
|
| 399 |
|
|
| 400 |
textId = name+"_"+formater.format(itext) |
|
| 401 |
|
|
| 402 |
def rubrique |
|
| 403 |
def date |
|
| 404 |
def words |
|
| 405 |
def pages |
|
| 406 |
def textorder |
|
| 407 |
def ref |
|
| 408 |
|
|
| 409 |
// write metadata |
|
| 410 |
metadataWriter.print "$textId" |
|
| 411 |
for (def k : textMetadata.keySet()) {
|
|
| 412 |
value = getText(textMetadata[k]) |
|
| 413 |
if (value == null) value = "N/A" |
|
| 414 |
def potentialDate |
|
| 415 |
if (k == "DocHeader") { // date, words, pages
|
|
| 416 |
docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) [0-9][0-9]? (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) [0-9]{4}) - ([0-9]+) mots, p\. (.*)$/)
|
|
| 417 |
if (docHeaderParse.size() == 1) {
|
|
| 418 |
|
|
| 419 |
date = docHeaderParse[0][2] |
|
| 420 |
style = java.text.DateFormat.FULL |
|
| 421 |
df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE) |
|
| 422 |
potentialDate = df.parse(date) |
|
| 423 |
formatter = new java.text.SimpleDateFormat("yyyy-MM-dd")
|
|
| 424 |
rubrique = docHeaderParse[0][1].trim() |
|
| 425 |
date = formatter.format(potentialDate) |
|
| 426 |
textorder = date |
|
| 427 |
words = docHeaderParse[0][5] |
|
| 428 |
pages = docHeaderParse[0][6] |
|
| 429 |
|
|
| 430 |
} else { // date
|
|
| 431 |
docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) ([0-9][0-9]?) (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) ([0-9]{4}))(.*)$/)
|
|
| 432 |
if (docHeaderParse.size() == 1) {
|
|
| 433 |
rubrique = "NA" |
|
| 434 |
date = docHeaderParse[0][2] |
|
| 435 |
style = java.text.DateFormat.FULL |
|
| 436 |
df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE) |
|
| 437 |
try {
|
|
| 438 |
potentialDate = df.parse(date) |
|
| 439 |
formatter = new java.text.SimpleDateFormat("yyyy-MM-dd")
|
|
| 440 |
date = formatter.format(potentialDate) |
|
| 441 |
} catch (Exception e) {
|
|
| 442 |
println "can't parse date: '$date'" |
|
| 443 |
date = "NA" |
|
| 444 |
} |
|
| 445 |
textorder = date |
|
| 446 |
words = "NA" |
|
| 447 |
pages = "NA" |
|
| 448 |
} else {
|
|
| 449 |
rubrique = "NA" |
|
| 450 |
date = "NA" |
|
| 451 |
textorder = date |
|
| 452 |
words = "NA" |
|
| 453 |
pages = "NA" |
|
| 454 |
} |
|
| 455 |
} |
|
| 456 |
} else if (k == "DocPublicationName") ref = value.trim() |
|
| 457 |
|
|
| 458 |
metadataWriter.print columnSeparator+txtSeparator+value.replaceAll("\n", "").trim().replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator
|
|
| 459 |
} |
|
| 460 |
ref = ref+", "+date |
|
| 461 |
metadataWriter.print columnSeparator+txtSeparator+rubrique.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator |
|
| 462 |
metadataWriter.print columnSeparator+txtSeparator+date.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator |
|
| 463 |
metadataWriter.print columnSeparator+txtSeparator+words+txtSeparator |
|
| 464 |
metadataWriter.print columnSeparator+txtSeparator+pages+txtSeparator |
|
| 465 |
metadataWriter.print columnSeparator+txtSeparator+textorder+txtSeparator |
|
| 466 |
metadataWriter.println "" |
|
| 467 |
|
|
| 468 |
// write content |
|
| 469 |
def writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(xmlFile) , "UTF-8")) |
|
| 470 |
writer.println "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" |
|
| 471 |
textContent.name = "text" // set root tag to "text" |
|
| 472 |
textContent["@id"] = textId // set the text id |
|
| 473 |
textContent["@ref"] = ref |
|
| 474 |
|
|
| 475 |
// write XML file |
|
| 476 |
new XmlNodePrinter(writer).print(textContent) |
|
| 477 |
writer.close() |
|
| 478 |
} |
|
| 479 |
|
|
| 480 |
// File xmlFile = |
|
| 481 |
//println textMetadata.size() |
|
| 482 |
} |
|
| 483 |
|
|
| 484 |
metadataWriter.close() |
|
| 485 |
|
|
| 486 |
if (ignored.size() > 0) {
|
|
| 487 |
File ignoredFile = new File (duplicates, "ignored.txt") |
|
| 488 |
ignoredFile.withWriter("UTF-8") { writer ->
|
|
| 489 |
writer.println "TOTAL: "+ignored.size() |
|
| 490 |
for (int i = 0 ; i < ignored.size() ; i++) {
|
|
| 491 |
def sign = ignored[i] |
|
| 492 |
writer.println "\n**DUPLICATE\n " |
|
| 493 |
writer.println "keeped="+dones[sign] |
|
| 494 |
writer.println "duplicates="+ignoreds[i] |
|
| 495 |
writer.println "SIGN="+sign |
|
| 496 |
writer.println "\n" |
|
| 497 |
} |
|
| 498 |
} |
|
| 499 |
println "TOTAL IGNORED: "+ignored.size() |
|
| 500 |
println " see $ignoredFile for text IDs" |
|
| 501 |
} |
|
| 502 |
|
|
| 503 |
println "$itext articles found." |
|
| 504 |
if (debug) {
|
|
| 505 |
tmpXhtmlOutput.deleteDir() |
|
| 506 |
tmpDir.deleteDir() |
|
| 507 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/debug/PreferencesMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro.debug |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcp.swt.widget.parameters.* |
|
| 7 |
import org.txm.core.preferences.TXMPreferences |
|
| 8 |
import org.txm.searchengine.cqp.CQPPreferences |
|
| 9 |
|
|
| 10 |
//org.txm.core.preferences.TXMPreferences.dump(); |
|
| 11 |
|
|
| 12 |
println TXMPreferences.getString(CQPPreferences.CQI_SERVER_PATH_TO_CQPLIB, CQPPreferences.PREFERENCES_NODE); |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/PlotEllipsesMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macroproto |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
|
|
| 6 |
import groovy.transform.Field |
|
| 7 |
|
|
| 8 |
import org.txm.ca.core.functions.CA |
|
| 9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 10 |
import org.txm.statsengine.r.core.RWorkspace |
|
| 11 |
|
|
| 12 |
// BEGINNING OF PARAMETERS |
|
| 13 |
|
|
| 14 |
if (!(corpusViewSelection instanceof CA)) {
|
|
| 15 |
println "Selection is not a CA. Please select a CA result in the Corpus view" |
|
| 16 |
return; |
|
| 17 |
} |
|
| 18 |
|
|
| 19 |
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg") |
|
| 20 |
def outputFile |
|
| 21 |
|
|
| 22 |
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row") |
|
| 23 |
def draw |
|
| 24 |
// Open the parameters input dialog box |
|
| 25 |
if (!ParametersDialog.open(this)) return; |
|
| 26 |
|
|
| 27 |
// END OF PARAMETERS |
|
| 28 |
def ca = corpusViewSelection |
|
| 29 |
def s = ca.getSymbol() |
|
| 30 |
def RW = RWorkspace.getRWorkspaceInstance() |
|
| 31 |
|
|
| 32 |
def script = """ |
|
| 33 |
plot($s); |
|
| 34 |
ellipseCA($s, ellipse=c("$draw"));
|
|
| 35 |
""" |
|
| 36 |
|
|
| 37 |
RW.plot(outputFile, script); |
|
| 38 |
|
|
| 39 |
println "Done: "+outputFile.getAbsolutePath() |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/stats/PlotEllipsesMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 7 |
import org.txm.functions.ca.CA |
|
| 8 |
import org.txm.stat.engine.r.RWorkspace |
|
| 9 |
// BEGINNING OF PARAMETERS |
|
| 10 |
|
|
| 11 |
if (!(corpusViewSelection instanceof CA)) {
|
|
| 12 |
println "selection is not a CA. Please select a CA result in the Corpus view" |
|
| 13 |
return; |
|
| 14 |
} |
|
| 15 |
|
|
| 16 |
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg") |
|
| 17 |
def outputFile |
|
| 18 |
|
|
| 19 |
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row") |
|
| 20 |
def draw |
|
| 21 |
// Open the parameters input dialog box |
|
| 22 |
if (!ParametersDialog.open(this)) return; |
|
| 23 |
|
|
| 24 |
// END OF PARAMETERS |
|
| 25 |
def ca = corpusViewSelection |
|
| 26 |
def s = ca.getSymbol() |
|
| 27 |
def RW = RWorkspace.getRWorkspaceInstance() |
|
| 28 |
|
|
| 29 |
def script = """ |
|
| 30 |
plot($s); |
|
| 31 |
ellipseCA($s, ellipse=c("$draw"));
|
|
| 32 |
""" |
|
| 33 |
|
|
| 34 |
RW.plot(outputFile, script); |
|
| 35 |
|
|
| 36 |
println "Done: "+outputFile.getAbsolutePath() |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/ApplyXQueryMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
|
|
| 5 |
import org.kohsuke.args4j.* |
|
| 6 |
import groovy.transform.Field |
|
| 7 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 8 |
import org.txm.searchengine.cqp.corpus.* |
|
| 9 |
import net.sf.saxon.* |
|
| 10 |
import net.sf.saxon.query.* |
|
| 11 |
import net.sf.saxon.om.* |
|
| 12 |
import javax.xml.transform.* |
|
| 13 |
import javax.xml.transform.sax.* |
|
| 14 |
import javax.xml.transform.stream.* |
|
| 15 |
import org.xml.sax.* |
|
| 16 |
import javax.xml.xpath.* |
|
| 17 |
import net.sf.saxon.event.* |
|
| 18 |
import org.w3c.dom.* |
|
| 19 |
import net.sf.saxon.s9api.* |
|
| 20 |
import javax.xml.parsers.* |
|
| 21 |
|
|
| 22 |
// BEGINNING OF PARAMETERS |
|
| 23 |
|
|
| 24 |
if (!(corpusViewSelection instanceof Corpus)) {
|
|
| 25 |
println "Error: Selection must be a corpus" |
|
| 26 |
return false; |
|
| 27 |
} |
|
| 28 |
|
|
| 29 |
@Field @Option(name="xqFile", usage="a Xquery file", widget="FileOpen", required=true, def="C:/Temp/foo.xq") |
|
| 30 |
def xqFile |
|
| 31 |
@Field @Option(name="outFile", usage="optional output file", widget="File", required=false, def="") |
|
| 32 |
def outFile |
|
| 33 |
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false") |
|
| 34 |
def debug |
|
| 35 |
|
|
| 36 |
// Open the parameters input dialog box |
|
| 37 |
if (!ParametersDialog.open(this)) return; |
|
| 38 |
|
|
| 39 |
// END OF PARAMETERS |
|
| 40 |
|
|
| 41 |
println "corpora selection: "+corpusViewSelection |
|
| 42 |
if (!xqFile.getName().endsWith(".xq")) {
|
|
| 43 |
println "Error: Xquery selected file is not a '.xd' file: $xdFile" |
|
| 44 |
return false; |
|
| 45 |
} |
|
| 46 |
|
|
| 47 |
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus(); |
|
| 48 |
File binDir = mainCorpus.getBaseDirectory(); |
|
| 49 |
File txmDir = new File(binDir, "txm/"+mainCorpus.getName()); |
|
| 50 |
|
|
| 51 |
if (!txmDir.exists()) {
|
|
| 52 |
println "Error: the 'txm' directory does not exist: $txmDir" |
|
| 53 |
return false; |
|
| 54 |
} |
|
| 55 |
|
|
| 56 |
def xmlFiles = txmDir.listFiles(); |
|
| 57 |
if (xmlFiles == null || xmlFiles.size() == 0) {
|
|
| 58 |
println "Error: no file found in $txmDir" |
|
| 59 |
return false; |
|
| 60 |
} |
|
| 61 |
|
|
| 62 |
String query = """<matches> |
|
| 63 |
{
|
|
| 64 |
for \$t in fn:collection('$txmDir')
|
|
| 65 |
for \$w in \$t//tei:w |
|
| 66 |
let \$pos := \$w/txm:ana[@type="#frpos"]/text() |
|
| 67 |
return <match>{\$w/@id}</match>
|
|
| 68 |
} |
|
| 69 |
</matches> |
|
| 70 |
""" |
|
| 71 |
|
|
| 72 |
Processor processor = new Processor(false) |
|
| 73 |
XQueryCompiler xqc = processor.newXQueryCompiler() |
|
| 74 |
xqc.declareNamespace("tei", "http://www.tei-c.org/ns/1.0")
|
|
| 75 |
xqc.declareNamespace("txm", "http://textometrie.org/1.0")
|
|
| 76 |
xqc.declareNamespace("fn", "http://www.w3.org/2005/xpath-functions")
|
|
| 77 |
XQueryExecutable exp = xqc.compile(query) |
|
| 78 |
|
|
| 79 |
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); |
|
| 80 |
dfactory.setNamespaceAware(true); |
|
| 81 |
Document dom = dfactory.newDocumentBuilder().newDocument(); |
|
| 82 |
exp.load().run(new DOMDestination(dom)); |
|
| 83 |
if (outFile instanceof File && outFile.getName().length() > 0) {
|
|
| 84 |
def writer = outFile.newWriter("UTF-8")
|
|
| 85 |
writer.println dom.getDocumentElement() |
|
| 86 |
writer.close() |
|
| 87 |
println "Result written in "+outFile.getAbsolutePath() |
|
| 88 |
} else {
|
|
| 89 |
println dom.getDocumentElement() |
|
| 90 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/XSL2CQPMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 7 |
import org.txm.searchengine.cqp.corpus.* |
|
| 8 |
import org.txm.importer.ApplyXsl2; |
|
| 9 |
import groovy.util.XmlParser |
|
| 10 |
|
|
| 11 |
// BEGINNING OF PARAMETERS |
|
| 12 |
|
|
| 13 |
if (!(corpusViewSelection instanceof Corpus)) {
|
|
| 14 |
println "Error: Selection must be a corpus" |
|
| 15 |
return false; |
|
| 16 |
} |
|
| 17 |
|
|
| 18 |
@Field @Option(name="xslFile", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt") |
|
| 19 |
def xslFile |
|
| 20 |
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false") |
|
| 21 |
def debug |
|
| 22 |
|
|
| 23 |
// Open the parameters input dialog box |
|
| 24 |
if (!ParametersDialog.open(this)) return; |
|
| 25 |
|
|
| 26 |
// END OF PARAMETERS |
|
| 27 |
|
|
| 28 |
println "corpora selection: "+corpusViewSelection |
|
| 29 |
if (!xslFile.getName().endsWith(".xsl")) {
|
|
| 30 |
println "Error: XSL selected file is not a '.xsl' file: $xslFile" |
|
| 31 |
return false; |
|
| 32 |
} |
|
| 33 |
|
|
| 34 |
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus(); |
|
| 35 |
File binDir = mainCorpus.getBaseDirectory(); |
|
| 36 |
File txmDir = new File(binDir, "txm/"+mainCorpus.getName()); |
|
| 37 |
File resultsDir = new File(binDir, "results"); |
|
| 38 |
resultsDir.mkdirs() |
|
| 39 |
|
|
| 40 |
if (!txmDir.exists()) {
|
|
| 41 |
println "Error: the 'txm' directory does not exist: $txmDir" |
|
| 42 |
return false; |
|
| 43 |
} |
|
| 44 |
|
|
| 45 |
def xmlFiles = txmDir.listFiles(); |
|
| 46 |
if (xmlFiles == null || xmlFiles.size() == 0) {
|
|
| 47 |
println "Error: no file found in $txmDir" |
|
| 48 |
return false; |
|
| 49 |
} |
|
| 50 |
|
|
| 51 |
HashSet<List<String>> allmatches = new HashSet<String>(); |
|
| 52 |
ApplyXsl2 a = new ApplyXsl2(xslFile.getAbsolutePath()); |
|
| 53 |
println "Querying..." |
|
| 54 |
for (File xmlFile : xmlFiles) {
|
|
| 55 |
println " "+xmlFile.getName() |
|
| 56 |
File resultFile = new File(resultsDir, "xslqueryresult_"+xmlFile.getName()); |
|
| 57 |
a.process(xmlFile, resultFile); |
|
| 58 |
|
|
| 59 |
def matches = new XmlParser().parse(resultFile) |
|
| 60 |
matches.match.each() { match ->
|
|
| 61 |
def l = []; |
|
| 62 |
match.wRef.each() { l.add(it.attribute("id")); }
|
|
| 63 |
allmatches << l |
|
| 64 |
} |
|
| 65 |
if (!debug) resultFile.delete() |
|
| 66 |
} |
|
| 67 |
|
|
| 68 |
//println "Matches: " |
|
| 69 |
//for (def m : allmatches) println " "+m |
|
| 70 |
|
|
| 71 |
def subqueries = [] |
|
| 72 |
for (def m : allmatches) {
|
|
| 73 |
if (m.size() == 1) subqueries << "[id=\""+m[0]+"\"]" |
|
| 74 |
else if (m.size() == 2) |
|
| 75 |
subqueries << "[id=\""+m[0]+"\"][]" |
|
| 76 |
else |
|
| 77 |
subqueries << "[id=\""+m[0]+"\"]"+"[]{"+(m.size-1)+"}"
|
|
| 78 |
} |
|
| 79 |
def query = subqueries.join("|")
|
|
| 80 |
def initialquery = query |
|
| 81 |
while (query.length() > 1500) {
|
|
| 82 |
oldquery = query |
|
| 83 |
query = query.substring(0, query.indexOf("|"))
|
|
| 84 |
println "Warning : query has been truncated: "+oldquery |
|
| 85 |
} |
|
| 86 |
|
|
| 87 |
println "CQL: $query" |
|
| 88 |
if (debug) println "See debug files in: "+resultsDir |
|
| 89 |
if (initialquery != query) println "Initial CQL: $initialquery" |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/partition/PartsSizeMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
| 5 |
|
|
| 6 |
if (!(corpusViewSelection instanceof Partition)) {
|
|
| 7 |
println "Select a partition before calling this macro." |
|
| 8 |
return; |
|
| 9 |
} |
|
| 10 |
Partition p = corpusViewSelection |
|
| 11 |
|
|
| 12 |
for (def part : p.getParts()) {
|
|
| 13 |
println part.getName()+"\t"+part.getSize() |
|
| 14 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/DirectoryInfoMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import java.nio.file.Path |
|
| 7 |
import java.nio.file.attribute.FileOwnerAttributeView |
|
| 8 |
import java.nio.file.attribute.UserPrincipal |
|
| 9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 10 |
import org.txm.Toolbox |
|
| 11 |
import org.txm.searchengine.cqp.* |
|
| 12 |
import java.io.IOException; |
|
| 13 |
import java.nio.file.* |
|
| 14 |
import java.nio.file.attribute.*; |
|
| 15 |
|
|
| 16 |
// Parameter declaration - Déclaration du paramètre |
|
| 17 |
@Field @Option(name="directory", usage="the directory to diagnose", widget="Folder", required=true, def="set da enpos;") |
|
| 18 |
File directory; |
|
| 19 |
|
|
| 20 |
// Parameters settings UI |
|
| 21 |
if (!ParametersDialog.open(this)) {
|
|
| 22 |
println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.")
|
|
| 23 |
return |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
println "full path="+directory.getAbsolutePath() |
|
| 27 |
println " exists? "+directory.exists() |
|
| 28 |
println " read? "+directory.canRead() |
|
| 29 |
println " write? "+directory.canWrite() |
|
| 30 |
println " executable? "+directory.canExecute() |
|
| 31 |
println " hidden? "+directory.isHidden() |
|
| 32 |
def files = directory.listFiles() |
|
| 33 |
println " number of files? "+files.size() |
|
| 34 |
|
|
| 35 |
|
|
| 36 |
Path path = Paths.get(directory.getAbsolutePath()); |
|
| 37 |
|
|
| 38 |
FileOwnerAttributeView ownerAttributeView = Files.getFileAttributeView(path, FileOwnerAttributeView.class); |
|
| 39 |
if (ownerAttributeView != null) {
|
|
| 40 |
UserPrincipal owner = ownerAttributeView.getOwner(); |
|
| 41 |
if (owner != null) println " file owner attribute: "+owner.getName() |
|
| 42 |
} |
|
| 43 |
|
|
| 44 |
AclFileAttributeView aclAttributeView = Files.getFileAttributeView(path, AclFileAttributeView.class); |
|
| 45 |
if (aclAttributeView != null) {
|
|
| 46 |
List<AclEntry> acl = aclAttributeView.getAcl(); |
|
| 47 |
if (acl != null) {
|
|
| 48 |
for (AclEntry entry : acl) |
|
| 49 |
if (acl != null) println " acl entry: "+entry |
|
| 50 |
} |
|
| 51 |
} |
|
| 52 |
|
|
| 53 |
BasicFileAttributeView basicAttributeView = Files.getFileAttributeView(path, BasicFileAttributeView.class); |
|
| 54 |
if (basicAttributeView != null) {
|
|
| 55 |
BasicFileAttributes attributes = basicAttributeView.readAttributes() |
|
| 56 |
if (attributes != null) {
|
|
| 57 |
println " basic attributes: creation time: "+attributes.creationTime() |
|
| 58 |
println " basic attributes: last access time: "+attributes.lastAccessTime() |
|
| 59 |
println " basic attributes: last modification time: "+attributes.lastModifiedTime() |
|
| 60 |
println " basic attributes: file key: "+attributes.fileKey() |
|
| 61 |
println " basic attributes: directory file?: "+attributes.isDirectory() |
|
| 62 |
println " basic attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
| 63 |
println " basic attributes: regular file?: "+attributes.isRegularFile() |
|
| 64 |
} |
|
| 65 |
} |
|
| 66 |
|
|
| 67 |
DosFileAttributeView dosAttributeView = Files.getFileAttributeView(path, DosFileAttributeView.class); |
|
| 68 |
if (dosAttributeView != null) {
|
|
| 69 |
DosFileAttributes attributes = dosAttributeView.readAttributes() |
|
| 70 |
if (attributes != null) {
|
|
| 71 |
println " dos attributes: creation time: "+attributes.creationTime() |
|
| 72 |
println " dos attributes: last access time: "+attributes.lastAccessTime() |
|
| 73 |
println " dos attributes: last modification time: "+attributes.lastModifiedTime() |
|
| 74 |
println " dos attributes: file key: "+attributes.fileKey() |
|
| 75 |
println " dos attributes: directory file?: "+attributes.isDirectory() |
|
| 76 |
println " dos attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
| 77 |
println " dos attributes: regular file?: "+attributes.isRegularFile() |
|
| 78 |
println " dos attributes: archive file?: "+attributes.isArchive() |
|
| 79 |
println " dos attributes: system file?: "+attributes.isSystem() |
|
| 80 |
} |
|
| 81 |
} |
|
| 82 |
|
|
| 83 |
PosixFileAttributeView posixAttributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class); |
|
| 84 |
if (posixAttributeView != null) {
|
|
| 85 |
PosixFileAttributes attributes = posixAttributeView.readAttributes() |
|
| 86 |
if (attributes != null) {
|
|
| 87 |
println " posix attributes: group: "+attributes.group() |
|
| 88 |
println " posix attributes: creation time: "+attributes.creationTime() |
|
| 89 |
println " posix attributes: last access time: "+attributes.lastAccessTime() |
|
| 90 |
println " posix attributes: last modification time: "+attributes.lastModifiedTime() |
|
| 91 |
println " posix attributes: file key: "+attributes.fileKey() |
|
| 92 |
println " posix attributes: directory file?: "+attributes.isDirectory() |
|
| 93 |
println " posix attributes: symbolic link?: "+attributes.isSymbolicLink() |
|
| 94 |
println " posix attributes: regular file?: "+attributes.isRegularFile() |
|
| 95 |
println " posix attributes: permissions: "+attributes.permissions().sort() |
|
| 96 |
} |
|
| 97 |
} |
|
| 98 |
|
|
| 99 |
UserDefinedFileAttributeView userdefinedAttributeView = Files.getFileAttributeView(path, UserDefinedFileAttributeView.class); |
|
| 100 |
if (userdefinedAttributeView != null) {
|
|
| 101 |
def attributes = userdefinedAttributeView.list() |
|
| 102 |
if (attributes != null) {
|
|
| 103 |
for (def entry : attributes) |
|
| 104 |
println " user defined attributes: "+entry |
|
| 105 |
} |
|
| 106 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/SetFileRightsMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 7 |
|
|
| 8 |
// BEGINNING OF PARAMETERS |
|
| 9 |
|
|
| 10 |
@Field @Option(name="file", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt") |
|
| 11 |
def file |
|
| 12 |
|
|
| 13 |
@Field @Option(name="read_right", usage="read", widget="Boolean", required=true, def="true") |
|
| 14 |
def read_right |
|
| 15 |
@Field @Option(name="write_right", usage="write", widget="Boolean", required=true, def="true") |
|
| 16 |
def write_right |
|
| 17 |
@Field @Option(name="execute_right", usage="execute", widget="Boolean", required=true, def="true") |
|
| 18 |
def execute_right |
|
| 19 |
|
|
| 20 |
@Field @Option(name="current_user_only", usage="read", widget="Boolean", required=true, def="false") |
|
| 21 |
def current_user_only |
|
| 22 |
|
|
| 23 |
// Open the parameters input dialog box |
|
| 24 |
if (!ParametersDialog.open(this)) return; |
|
| 25 |
|
|
| 26 |
// END OF PARAMETERS |
|
| 27 |
|
|
| 28 |
if (file.exists()) {
|
|
| 29 |
|
|
| 30 |
file.setReadable(read_right, current_user_only); |
|
| 31 |
file.setWritable(write_right, current_user_only) |
|
| 32 |
file.setExecutable(execute_right, current_user_only); |
|
| 33 |
|
|
| 34 |
} else {
|
|
| 35 |
println "Error: file not found $file" |
|
| 36 |
} |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/ExecPythonMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
// STANDARD DECLARATIONS |
|
| 2 |
package org.txm.macro |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 7 |
|
|
| 8 |
// BEGINNING OF PARAMETERS |
|
| 9 |
|
|
| 10 |
// Declare each parameter here |
|
| 11 |
// (available widget types: Query, File, Folder, String, Text, Boolean, Integer, Float and Date) |
|
| 12 |
|
|
| 13 |
@Field @Option(name="pythonFile", usage="an example file", widget="FileOpen", required=true, def="script.py") |
|
| 14 |
def pythonFile |
|
| 15 |
|
|
| 16 |
// Parameters settings UI |
|
| 17 |
if (!ParametersDialog.open(this)) {
|
|
| 18 |
println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.")
|
|
| 19 |
return |
|
| 20 |
} |
|
| 21 |
|
|
| 22 |
def process = "python $pythonFile".execute() |
|
| 23 |
process.text.eachLine {println it}
|
|
| 24 |
|
|
| 25 |
def exitValue = process.exitValue() |
|
| 26 |
if (exitValue != 0) println "Error during execution: $exitValue" |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ExecXSLDOMMacro.groovy (revision 1543) | ||
|---|---|---|
| 1 |
package org.txm.macro.xml; |
|
| 2 |
// STANDARD DECLARATIONS |
|
| 3 |
|
|
| 4 |
import org.kohsuke.args4j.* |
|
| 5 |
import groovy.transform.Field |
|
| 6 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
| 7 |
import org.txm.importer.ApplyXsl2; |
|
| 8 |
import javax.xml.transform.stream.* |
|
| 9 |
import javax.xml.transform.dom.DOMResult |
|
| 10 |
import org.w3c.dom.* |
|
| 11 |
|
|
| 12 |
|
|
| 13 |
// BEGINNING OF PARAMETERS |
|
| 14 |
@Field @Option(name="XSLFile", usage="an example file", widget="File", required=true, def="file.xsl") |
|
| 15 |
def XSLFile = new File(System.getProperty("user.home"),"TXM/xsl/identity.xsl")
|
|
| 16 |
|
|
| 17 |
@Field @Option(name="intputDirectory", usage="an example folder", widget="Folder", required=true, def="in") |
|
| 18 |
def intputDirectory = new File(System.getProperty("user.home"),"xml/TESTS2/xml")
|
|
| 19 |
|
|
| 20 |
//@Field @Option(name="parameters", usage="an example folder", widget="Text", required=false, def="") |
|
| 21 |
def parameters = [:] |
|
| 22 |
|
|
| 23 |
@Field @Option(name="dom", usage="XSLT Result is - true: a DOM Element. false - a XSLT Result is XMLStreamReader", widget="Boolean", required=true, def="true") |
|
| 24 |
def dom |
|
| 25 |
|
|
| 26 |
@Field @Option(name="debug", usage="Show debug messages, value = true|false", widget="Boolean", required=true, def="false") |
|
| 27 |
def debug |
|
| 28 |
|
|
| 29 |
if (!ParametersDialog.open(this)) return; |
|
| 30 |
// END OF PARAMETERS |
|
| 31 |
|
|
| 32 |
// USER MANIPULATIONS |
|
| 33 |
|
|
| 34 |
def processDOMResult(File inputXMLFile, def resultnode) {
|
|
| 35 |
// with resultnode a Element : https://docs.oracle.com/javase/8/docs/api/org/w3c/dom/Element.html |
|
| 36 |
println inputXMLFile.getName()+" -> "+ resultnode.getTagName() |
|
| 37 |
} |
|
| 38 |
|
|
| 39 |
// END USER MANIPULATIONS |
|
| 40 |
|
|
| 41 |
println "Use XSL $XSLFile with parameters $parameters" |
|
| 42 |
println "Processed directory: $intputDirectory" |
|
| 43 |
|
|
| 44 |
def files = [] |
|
| 45 |
ApplyXsl2 a = new ApplyXsl2(XSLFile.getAbsolutePath()); |
|
| 46 |
intputDirectory.eachFileMatch(~/.+\.(xml|XML)/) { XMLFile ->
|
|
| 47 |
String name = XMLFile.getName() |
|
| 48 |
try {
|
|
| 49 |
def result = process(a, XMLFile, [:]); |
|
| 50 |
if (dom) processDOMResult(XMLFile, result.getNode().getDocumentElement()); |
|
| 51 |
else processSaxResult(XMLFile, result); |
|
| 52 |
files << XMLFile |
|
| 53 |
} catch (Exception e) {
|
|
| 54 |
println "Warning: XSL transformation of '$name' failed with error=$e with " |
|
| 55 |
if (debug) e.printStackTrace(); |
|
| 56 |
} |
|
| 57 |
} |
|
| 58 |
|
|
| 59 |
def process(ApplyXsl2 a, File inputXMLFile, def args) throws Exception {
|
|
| 60 |
for (String k : args.keySet()) {
|
|
| 61 |
if (!this.setParam(k, args[k])) |
|
| 62 |
return false; |
|
| 63 |
} |
|
| 64 |
|
|
| 65 |
def result = null; |
|
| 66 |
if (dom) result = new DOMResult(); |
|
| 67 |
else {
|
|
| 68 |
XMLStreamReader xmlreader = new XMLStreamReader(); |
|
| 69 |
PipedInputStream inpipe = new PipedInputStream(xmlreader |
|
| 70 |
PipedOutputStream outpipe = new PipedOutputStream(); |
|
| 71 |
result = new StreamResult(new BufferedOutputStream(new FileOutputStream(xmloutfile))); |
|
| 72 |
} |
|
| 73 |
a.transformer.transform(new StreamSource(inputXMLFile), result); |
|
| 74 |
a.cleanMemory(); // save memory |
|
| 75 |
a.resetParams() |
|
| 76 |
return result; |
|
| 77 |
} |
|
Formats disponibles : Unified diff