Révision 1543

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/conversion/EuroPressToXML2018Macro.groovy (revision 1543)
1
package org.txm.macro.conversion
2
// STANDARD DECLARATIONS
3

  
4
import groovy.xml.QName
5
import java.text.DecimalFormat
6
import org.txm.importer.DomUtils
7
import org.txm.importer.ValidateXml
8
import org.w3c.tidy.Tidy
9
import groovy.util.XmlParser
10
import org.kohsuke.args4j.*
11
import groovy.transform.Field
12
import org.txm.rcpapplication.swt.widget.parameters.*
13
import org.jsoup.Jsoup
14
import org.jsoup.nodes.Document.OutputSettings.Syntax
15

  
16
// BEGINNING OF PARAMETERS
17

  
18
@Field @Option(name="inputDir", usage="The directory containing the html files, to export from the Europress portal", widget="Folder", required=true, def="")
19
def inputDir
20

  
21
@Field @Option(name="inputEncoding", usage="character encoding used in the HTML exported files", widget="String", required=false, def="iso-8859-1")
22
String inputEncoding
23

  
24
@Field @Option(name="outputDir", usage="The directory containing the result files, to import with the XTZ+CSV import module into TXM", widget="Folder", required=true, def="")
25
def outputDir
26

  
27
@Field @Option(name="corpusName", usage="corpus name", widget="String", required=true, def="")
28
String corpusName
29

  
30
@Field @Option(name="columnSeparator",usage="", widget="String", required=false, def=",")
31
def columnSeparator
32

  
33
@Field @Option(name="txtSeparator",usage="", widget="String", required=false, def="\"")
34
def txtSeparator
35

  
36
@Field @Option(name="debug", usage="show debug messages and keep temporary results", widget="Boolean", required=false, def="false")
37
def debug
38

  
39
// Open the parameters input dialog box
40
if (!ParametersDialog.open(this)) return
41

  
42
// END OF PARAMETERS
43

  
44
if (!inputDir.exists()) {
45
	println "** inputDir does not exist: $inputDir, aborting."
46
	return false
47
}
48

  
49
xslposttokContent = """<?xml version="1.0"?>
50
<xsl:stylesheet xmlns:edate="http://exslt.org/dates-and-times"
51
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0"
52
  xmlns:txm="http://textometrie.org/ns/1.0"
53
  exclude-result-prefixes="tei edate" xpath-default-namespace="http://www.tei-c.org/ns/1.0" version="2.0">
54

  
55
  <!--
56
This software is dual-licensed:
57

  
58
1. Distributed under a Creative Commons Attribution-ShareAlike 3.0
59
Unported License http://creativecommons.org/licenses/by-sa/3.0/ 
60

  
61
2. http://www.opensource.org/licenses/BSD-2-Clause
62
		
63
All rights reserved.
64

  
65
Redistribution and use in source and binary forms, with or without
66
modification, are permitted provided that the following conditions are
67
met:
68

  
69
* Redistributions of source code must retain the above copyright
70
notice, this list of conditions and the following disclaimer.
71

  
72
* Redistributions in binary form must reproduce the above copyright
73
notice, this list of conditions and the following disclaimer in the
74
documentation and/or other materials provided with the distribution.
75

  
76
This software is provided by the copyright holders and contributors
77
"as is" and any express or implied warranties, including, but not
78
limited to, the implied warranties of merchantability and fitness for
79
a particular purpose are disclaimed. In no event shall the copyright
80
holder or contributors be liable for any direct, indirect, incidental,
81
special, exemplary, or consequential damages (including, but not
82
limited to, procurement of substitute goods or services; loss of use,
83
data, or profits; or business interruption) however caused and on any
84
theory of liability, whether in contract, strict liability, or tort
85
(including negligence or otherwise) arising in any way out of the use
86
of this software, even if advised of the possibility of such damage.
87

  
88
     
89
This stylesheet adds a ref attribute to w elements that will be used for
90
references in TXM concordances. Can be used with TXM XTZ import module.
91

  
92
w ref is composed of :
93
- docpublicationname
94
- date
95

  
96
Written by Alexei Lavrentiev, UMR 5317 IHRIM, 2017
97
Serge Heiden, UMR 5317 IHRIM, 2018
98
  -->
99

  
100

  
101
  <xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/> 
102
  
103
  
104
  <!-- General patterns: all elements, attributes, comments and processing instructions are copied -->
105
  
106
  <xsl:template match="*">      
107
        <xsl:copy>
108
          <xsl:apply-templates select="*|@*|processing-instruction()|comment()|text()"/>
109
        </xsl:copy>    
110
  </xsl:template>
111
  
112
  <xsl:template match="*" mode="position"><xsl:value-of select="count(preceding-sibling::*)"/></xsl:template>
113

  
114
  <xsl:template match="@*|comment()|processing-instruction()">
115
    <xsl:copy/>
116
  </xsl:template>
117
  
118
  <xsl:template match="*:w">
119
    <xsl:variable name="ref">
120

  
121
	<xsl:choose>
122
         <xsl:when test="ancestor::*:text[1]/@ref">
123
           <!-- <xsl:text>ref: </xsl:text> -->
124
	   <xsl:value-of select="ancestor::*:text[1]/@ref"/>
125
         </xsl:when>
126
         <xsl:otherwise>
127
          <!-- <xsl:text>[NO docpublicationname]</xsl:text> -->
128
         </xsl:otherwise>
129
       </xsl:choose>
130
	<xsl:choose>
131
         <xsl:when test="ancestor::*:text[1]/@docpublicationname">
132
           <!-- <xsl:text>docpublicationname: </xsl:text> -->
133
	   <xsl:value-of select="ancestor::*:text[1]/@docpublicationname"/>
134
         </xsl:when>
135
         <xsl:otherwise>
136
          <!-- <xsl:text>[NO docpublicationname]</xsl:text> -->
137
         </xsl:otherwise>
138
       </xsl:choose>
139
	<xsl:choose>
140
         <xsl:when test="ancestor::*:text[1]/@date">
141
           <!-- <xsl:text>date: </xsl:text> -->
142
	   <xsl:value-of select="ancestor::*:text[1]/@date"/>
143
         </xsl:when>
144
         <xsl:otherwise>
145
          <!-- <xsl:text>[NO date]</xsl:text> -->
146
         </xsl:otherwise>
147
       </xsl:choose>
148

  
149
<!--
150
      <xsl:if test="ancestor::*:text/@*:id and preceding::*:pb[1]/@n">
151
        <xsl:text>, </xsl:text>
152
      </xsl:if>
153
-->
154
      <xsl:if test="ancestor::*:p[1]/@n">
155
        <xsl:text>§ </xsl:text>
156
        <xsl:value-of select="ancestor::*:p[1]/@n"/>
157
      </xsl:if>
158
<!--
159
      <xsl:if test="preceding::*:pb[1]/@n">
160
        <xsl:text>p. </xsl:text>
161
        <xsl:value-of select="preceding::*:pb[1]/@n"/>
162
      </xsl:if>      <xsl:if test="(ancestor::*:text/@*:id or preceding::*:pb[1]/@n) and preceding::*:lb[1]/@n">
163
        <xsl:text>, </xsl:text>
164
      </xsl:if>
165
      <xsl:if test="preceding::*:lb[1]/@n">
166
        <xsl:text>l. </xsl:text>
167
        <xsl:value-of select="preceding::*:lb[1]/@n"/>
168
      </xsl:if>
169
-->
170
    </xsl:variable>
171
        <xsl:copy>
172
          <xsl:apply-templates select="@*"/>
173
          <xsl:attribute name="ref"><xsl:value-of select="\$ref"/></xsl:attribute>
174
          <xsl:apply-templates select="*|processing-instruction()|comment()|text()"/>
175
        </xsl:copy>
176
  </xsl:template>  
177

  
178
</xsl:stylesheet>
179
"""
180

  
181
cssContent = """/*  
182
   Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté
183
   Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
184
   @author cbourdot
185
   @author sheiden
186

  
187
   TXM default CSS 06-2017
188

  
189
*/
190

  
191
.txmeditionpage {
192
	font-size: 14px;
193
	text-indent: none;
194
	text-align: justify;
195
	box-shadow: .3125em .3125em .625em 0 #888;
196
	margin: 1.25em auto;
197
	padding: 1.25em;
198
	width: 400px;
199
	min-height: 90%;
200
}
201

  
202
.txmeditionpb {
203
	text-align: center;
204
}
205

  
206
.txmeditionpb::before {
207
	content: "- ";
208
}
209

  
210
.txmeditionpb::after {
211
	content: " -";
212
}
213

  
214
.txmlettrinep:first-letter {
215
    float: left;
216
    font-size: 6em;
217
    line-height: 1;
218
    margin-right: 0.2em;
219
}
220

  
221
a {
222
	color:#802520;
223
}
224

  
225
h1 {
226
	font-size: 20px;
227
	font-variant: small-caps;
228
	text-align: center;
229
	color:#802520;
230
}
231

  
232
h2 {
233
	font-size: 18px;
234
	font-variant: small-caps;
235
	text-align: center;
236
	color:#802520;
237
}
238

  
239
h3 {
240
	font-size: 16px;
241
	font-variant: small-caps;
242
	text-align: center;
243
	color:#802520;
244
}
245

  
246
p {
247
    	text-indent: 0.2cm;
248
	text-align: justify;
249
    	text-justify: inter-word;	
250
  }
251

  
252
img {
253
    margin: 10px 10px 10px 10px;
254
}
255

  
256
td[rend="table-cell-align-right"] {
257
	text-align: right;
258
}
259

  
260
td[rend="table-cell-align-left"] {
261
	text-align: left;
262
}
263

  
264
td[rend="table-cell-align-center"] {
265
	text-align: center;
266
}
267
"""
268

  
269
outputDir.deleteDir()
270
outputDir.mkdir()
271
outputDir = new File(outputDir, corpusName)
272
outputDir.deleteDir()
273
outputDir.mkdir()
274

  
275
tmpDir = new File(outputDir, "tmp")
276
tmpDir.deleteDir()
277
tmpDir.mkdir()
278

  
279
tmpXhtmlOutput = new File(outputDir, "xhtml")
280
tmpXhtmlOutput.deleteDir()
281
tmpXhtmlOutput.mkdir()
282

  
283
duplicates = new File(outputDir, "duplicates")
284
duplicates.deleteDir()
285
duplicates.mkdir()
286

  
287
xslDir = new File(outputDir, "xsl")
288
xslDir.mkdir()
289
xslposttokDir = new File(xslDir, "3-posttok")
290
xslposttokDir.mkdir()
291
xslposttokFile = new File(xslposttokDir, "txm-posttok-addRef-ref.xsl")
292
cssDir = new File(outputDir, "css")
293
cssDir.mkdir()
294
cssFile = new File(cssDir, corpusName+".css")
295

  
296
xslposttokFile << xslposttokContent
297
cssFile << cssContent
298

  
299
metadataFile = new File(outputDir, "metadata.csv")
300
metadataWriter = metadataFile.newWriter("UTF-8")
301

  
302
int itext = 0
303
def formater = new DecimalFormat("0000")
304

  
305
// HTML elements containing metadata content, with @class=metadataKeys
306
def metadataKeys = ["DocPublicationName", "DocHeader", "titreArticle"]
307

  
308
// HTML elements containing text content, with @class=textClass
309
def textClass = "docOcurrContainer"
310

  
311
// write metadata header
312
metadataWriter.print "id"
313
metadataKeys.each { metadataWriter.print columnSeparator+it.toLowerCase() }
314
// DocHeader substrings
315
metadataWriter.print columnSeparator+"rubrique"
316
metadataWriter.print columnSeparator+"date"
317
metadataWriter.print columnSeparator+"words"
318
metadataWriter.print columnSeparator+"pages"
319
metadataWriter.print columnSeparator+"textorder" // date
320
metadataWriter.println ""
321
		
322
def files = []
323
inputDir.eachFileMatch(~/.*\.(html|HTML)/){ htmlFile -> files << htmlFile}
324
files = files.sort()
325

  
326
def done = new HashSet<String>()
327
def ignored = []
328
def allTitles = new HashSet()
329
def dones = [:]
330
def ignoreds = []
331

  
332
// scan node for text content
333
def getText(def node) {
334
	//if (debug) println "node: "+node
335
	String s = " "
336
	if (node instanceof String) {
337
		s += " "+node
338
	} else {
339
		for(def c : node.children())
340
			s += " "+getText(c)
341
	}
342
	//println " "+s.replace("\n", " ").trim()
343
	return " "+s.replace("\n", " ").trim()
344
}
345

  
346
println files.size()+" files to process."
347
println "Creating $metadataFile"
348

  
349
for (File htmlFile : files) {
350
	println "Processing $htmlFile"
351
		
352
	String name = htmlFile.getName()
353
	name = name.substring(0, name.lastIndexOf("."))
354

  
355
	File xhtmlFile = new File(tmpXhtmlOutput, name+".xhtml")
356

  
357
	
358
	if (inputEncoding.size() > 0) {
359
		doc = Jsoup.parse(htmlFile, inputEncoding, "")
360
	} else{
361
		doc = Jsoup.parse(htmlFile, "UTF8")
362
	}
363
	doc.outputSettings().escapeMode(org.jsoup.nodes.Entities.EscapeMode.xhtml)
364
	doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml)
365

  
366
	xhtmlFile.withWriter("UTF-8") { out ->
367
		out.print doc.html().replace("\"=\"\"", "")
368
	}
369

  
370
	if (!ValidateXml.test(xhtmlFile)) {
371
		println "Error: $xhtmlFile is malformed."
372
		continue
373
	}
374
	
375
	def root = new XmlParser(false, true, true).parse(xhtmlFile)
376
	
377
	// one <article> per text
378
	for (def article : root.body.article) {
379
		def textMetadata = [:]
380
	
381
		for (def key : metadataKeys) {
382
			def values = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == key }
383
			textMetadata[key] = values
384
		}
385
		
386
		def textContent = article.'**'.find { node -> node instanceof groovy.util.Node && node["@class"] == textClass }
387

  
388
		String sign = getText(textMetadata["titreArticle"]) // identify a text
389
		
390
		// build text id
391
		itext++
392
		File xmlFile = xmlFile = new File(outputDir, name+"_"+formater.format((itext))+".xml")
393
		if (allTitles.contains(sign)) {
394
				ignored << sign
395
				xmlFile = new File(duplicates, name+"_"+formater.format(itext)+".xml")
396
				ignoreds << xmlFile.getName()
397
		}
398
		allTitles.add(sign)
399
		
400
		textId = name+"_"+formater.format(itext)
401
		
402
		def rubrique
403
		def date
404
		def words
405
		def pages
406
		def textorder
407
		def ref
408

  
409
		// write metadata
410
		metadataWriter.print "$textId"
411
		for (def k : textMetadata.keySet()) {
412
			value = getText(textMetadata[k])
413
			if (value == null) value = "N/A"
414
			def potentialDate
415
			if (k == "DocHeader") { // date, words, pages
416
				docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) [0-9][0-9]? (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) [0-9]{4}) - ([0-9]+) mots, p\. (.*)$/)
417
				if (docHeaderParse.size() == 1) {
418

  
419
					date = docHeaderParse[0][2]
420
					style = java.text.DateFormat.FULL
421
					df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE)
422
					potentialDate = df.parse(date)
423
					formatter = new java.text.SimpleDateFormat("yyyy-MM-dd")
424
					rubrique = docHeaderParse[0][1].trim()
425
					date = formatter.format(potentialDate)
426
					textorder = date
427
					words = docHeaderParse[0][5]
428
					pages = docHeaderParse[0][6]
429
					
430
				} else { // date
431
					docHeaderParse = (value =~ /^(.*)((lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche) ([0-9][0-9]?) (janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre) ([0-9]{4}))(.*)$/)
432
					if (docHeaderParse.size() == 1) {
433
						rubrique = "NA"
434
						date = docHeaderParse[0][2]
435
						style = java.text.DateFormat.FULL
436
						df = java.text.DateFormat.getDateInstance(style, java.util.Locale.FRANCE)
437
						try {
438
							potentialDate = df.parse(date)
439
							formatter = new java.text.SimpleDateFormat("yyyy-MM-dd")
440
							date = formatter.format(potentialDate)
441
						} catch (Exception e) {
442
							println "can't parse date: '$date'"
443
							date = "NA"
444
						}
445
						textorder = date
446
						words = "NA"
447
						pages = "NA"
448
					} else {
449
						rubrique = "NA"
450
						date = "NA"
451
						textorder = date
452
						words = "NA"
453
						pages = "NA"
454
					}
455
				}
456
			} else if (k == "DocPublicationName") ref = value.trim()
457
			
458
			metadataWriter.print columnSeparator+txtSeparator+value.replaceAll("\n", "").trim().replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator
459
		}
460
		ref = ref+", "+date
461
		metadataWriter.print columnSeparator+txtSeparator+rubrique.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator
462
		metadataWriter.print columnSeparator+txtSeparator+date.replaceAll(txtSeparator,txtSeparator+txtSeparator)+txtSeparator
463
		metadataWriter.print columnSeparator+txtSeparator+words+txtSeparator
464
		metadataWriter.print columnSeparator+txtSeparator+pages+txtSeparator
465
		metadataWriter.print columnSeparator+txtSeparator+textorder+txtSeparator
466
		metadataWriter.println ""
467
		
468
		// write content
469
		def writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(xmlFile) , "UTF-8"))
470
		writer.println "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
471
		textContent.name = "text" // set root tag to "text"
472
		textContent["@id"] = textId // set the text id
473
		textContent["@ref"] = ref
474
		
475
		// write XML file
476
		new XmlNodePrinter(writer).print(textContent)
477
		writer.close()
478
	}
479
	
480
//	File xmlFile = 
481
	//println textMetadata.size()
482
}
483

  
484
metadataWriter.close()
485

  
486
if (ignored.size() > 0) {
487
	File ignoredFile = new File (duplicates, "ignored.txt")
488
	ignoredFile.withWriter("UTF-8") { writer ->
489
		writer.println "TOTAL: "+ignored.size()
490
		for (int i = 0 ; i < ignored.size() ; i++) {
491
			def sign = ignored[i]
492
			writer.println "\n**DUPLICATE\n "
493
			writer.println "keeped="+dones[sign]
494
			writer.println "duplicates="+ignoreds[i]
495
			writer.println "SIGN="+sign
496
			writer.println "\n"
497
		}
498
	}
499
	println "TOTAL IGNORED: "+ignored.size()
500
	println "	see $ignoredFile for text IDs"
501
}
502

  
503
println "$itext articles found."
504
if (debug) {
505
	tmpXhtmlOutput.deleteDir()
506
	tmpDir.deleteDir()
507
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/debug/PreferencesMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.debug
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.core.preferences.TXMPreferences
8
import org.txm.searchengine.cqp.CQPPreferences
9

  
10
//org.txm.core.preferences.TXMPreferences.dump();
11

  
12
println TXMPreferences.getString(CQPPreferences.CQI_SERVER_PATH_TO_CQPLIB, CQPPreferences.PREFERENCES_NODE);
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/PlotEllipsesMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macroproto
3

  
4
import org.kohsuke.args4j.*
5

  
6
import groovy.transform.Field
7

  
8
import org.txm.ca.core.functions.CA
9
import org.txm.rcpapplication.swt.widget.parameters.*
10
import org.txm.statsengine.r.core.RWorkspace
11

  
12
// BEGINNING OF PARAMETERS
13

  
14
if (!(corpusViewSelection instanceof CA)) {
15
	println "Selection is not a CA. Please select a CA result in the Corpus view"
16
	return;
17
}
18

  
19
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg")
20
def outputFile
21

  
22
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row")
23
def draw
24
// Open the parameters input dialog box
25
if (!ParametersDialog.open(this)) return;
26

  
27
// END OF PARAMETERS
28
def ca = corpusViewSelection
29
def s = ca.getSymbol()
30
def RW = RWorkspace.getRWorkspaceInstance()
31

  
32
def script = """
33
plot($s);
34
ellipseCA($s, ellipse=c("$draw"));
35
"""
36

  
37
RW.plot(outputFile, script);
38

  
39
println "Done: "+outputFile.getAbsolutePath()
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/stats/PlotEllipsesMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.functions.ca.CA
8
import org.txm.stat.engine.r.RWorkspace
9
// BEGINNING OF PARAMETERS
10

  
11
if (!(corpusViewSelection instanceof CA)) {
12
	println "selection is not a CA. Please select a CA result in the Corpus view"
13
	return;
14
}
15

  
16
@Field @Option(name="outputFile", usage="an example file", widget="FileSave", required=true, def="file.svg")
17
def outputFile
18

  
19
@Field @Option(name="draw", usage="'row' or 'col'", widget="String", required=true, def="row")
20
def draw
21
// Open the parameters input dialog box
22
if (!ParametersDialog.open(this)) return;
23

  
24
// END OF PARAMETERS
25
def ca = corpusViewSelection
26
def s = ca.getSymbol()
27
def RW = RWorkspace.getRWorkspaceInstance()
28

  
29
def script = """
30
plot($s);
31
ellipseCA($s, ellipse=c("$draw"));
32
"""
33

  
34
RW.plot(outputFile, script);
35

  
36
println "Done: "+outputFile.getAbsolutePath()
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/ApplyXQueryMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4

  
5
import org.kohsuke.args4j.*
6
import groovy.transform.Field
7
import org.txm.rcpapplication.swt.widget.parameters.*
8
import org.txm.searchengine.cqp.corpus.*
9
import net.sf.saxon.*
10
import net.sf.saxon.query.*
11
import net.sf.saxon.om.*
12
import javax.xml.transform.*
13
import javax.xml.transform.sax.*
14
import javax.xml.transform.stream.*
15
import org.xml.sax.*
16
import javax.xml.xpath.*
17
import net.sf.saxon.event.*
18
import org.w3c.dom.*
19
import net.sf.saxon.s9api.*
20
import javax.xml.parsers.*
21

  
22
// BEGINNING OF PARAMETERS
23

  
24
if (!(corpusViewSelection instanceof Corpus)) {
25
	println "Error: Selection must be a corpus"
26
	return false;
27
}
28

  
29
@Field @Option(name="xqFile", usage="a Xquery file", widget="FileOpen", required=true, def="C:/Temp/foo.xq")
30
def xqFile
31
@Field @Option(name="outFile", usage="optional output file", widget="File", required=false, def="")
32
def outFile
33
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false")
34
def debug
35

  
36
// Open the parameters input dialog box
37
if (!ParametersDialog.open(this)) return;
38

  
39
// END OF PARAMETERS
40

  
41
println "corpora selection: "+corpusViewSelection
42
if (!xqFile.getName().endsWith(".xq")) {
43
	println "Error: Xquery selected file is not a '.xd' file: $xdFile"
44
	return false;
45
}
46

  
47
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus();
48
File binDir = mainCorpus.getBaseDirectory();
49
File txmDir = new File(binDir, "txm/"+mainCorpus.getName());
50

  
51
if (!txmDir.exists()) {
52
	println "Error: the 'txm' directory does not exist: $txmDir"
53
	return false;
54
}
55

  
56
def xmlFiles = txmDir.listFiles();
57
if (xmlFiles == null || xmlFiles.size() == 0) {
58
	println "Error: no file found in $txmDir"
59
	return false;
60
}
61

  
62
String query = """<matches>
63
  { 
64
    for \$t in fn:collection('$txmDir')
65
      for \$w in \$t//tei:w
66
        let \$pos := \$w/txm:ana[@type="#frpos"]/text()
67
        return <match>{\$w/@id}</match>
68
  }
69
</matches>
70
"""
71

  
72
Processor processor = new Processor(false)
73
XQueryCompiler xqc = processor.newXQueryCompiler()
74
xqc.declareNamespace("tei", "http://www.tei-c.org/ns/1.0")
75
xqc.declareNamespace("txm", "http://textometrie.org/1.0")
76
xqc.declareNamespace("fn", "http://www.w3.org/2005/xpath-functions")
77
XQueryExecutable exp = xqc.compile(query)
78

  
79
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
80
dfactory.setNamespaceAware(true);
81
Document dom = dfactory.newDocumentBuilder().newDocument();
82
exp.load().run(new DOMDestination(dom));
83
if (outFile instanceof File && outFile.getName().length() > 0) {
84
	def writer = outFile.newWriter("UTF-8")
85
	writer.println dom.getDocumentElement()
86
	writer.close()
87
	println "Result written in "+outFile.getAbsolutePath()
88
} else {
89
	println dom.getDocumentElement()
90
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/XSL2CQPMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.searchengine.cqp.corpus.*
8
import org.txm.importer.ApplyXsl2;
9
import groovy.util.XmlParser
10

  
11
// BEGINNING OF PARAMETERS
12

  
13
if (!(corpusViewSelection instanceof Corpus)) {
14
	println "Error: Selection must be a corpus"
15
	return false;
16
}
17

  
18
@Field @Option(name="xslFile", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt")
19
def xslFile
20
@Field @Option(name="debug", usage="an example file", widget="Boolean", required=true, def="false")
21
def debug
22

  
23
// Open the parameters input dialog box
24
if (!ParametersDialog.open(this)) return;
25

  
26
// END OF PARAMETERS
27

  
28
println "corpora selection: "+corpusViewSelection
29
if (!xslFile.getName().endsWith(".xsl")) {
30
	println "Error: XSL selected file is not a '.xsl' file: $xslFile"
31
	return false;
32
}
33

  
34
MainCorpus mainCorpus = ((Corpus)corpusViewSelection).getMainCorpus();
35
File binDir = mainCorpus.getBaseDirectory();
36
File txmDir = new File(binDir, "txm/"+mainCorpus.getName());
37
File resultsDir = new File(binDir, "results");
38
resultsDir.mkdirs()
39

  
40
if (!txmDir.exists()) {
41
	println "Error: the 'txm' directory does not exist: $txmDir"
42
	return false;
43
}
44

  
45
def xmlFiles = txmDir.listFiles();
46
if (xmlFiles == null || xmlFiles.size() == 0) {
47
	println "Error: no file found in $txmDir"
48
	return false;
49
}
50

  
51
HashSet<List<String>> allmatches = new HashSet<String>();
52
ApplyXsl2 a = new ApplyXsl2(xslFile.getAbsolutePath());
53
println "Querying..."
54
for (File xmlFile : xmlFiles) {
55
	println "	"+xmlFile.getName()
56
	File resultFile = new File(resultsDir, "xslqueryresult_"+xmlFile.getName());
57
	a.process(xmlFile, resultFile);
58
	
59
	def matches = new XmlParser().parse(resultFile)
60
	matches.match.each() { match ->
61
		def l = [];
62
		match.wRef.each() { l.add(it.attribute("id")); }
63
		allmatches << l
64
	}
65
	if (!debug) resultFile.delete()
66
}
67

  
68
//println "Matches: "
69
//for (def m : allmatches) println " "+m
70

  
71
def subqueries = []
72
for (def m : allmatches) {
73
	if (m.size() == 1) subqueries <<  "[id=\""+m[0]+"\"]"
74
	else if (m.size() == 2) 
75
		subqueries <<  "[id=\""+m[0]+"\"][]"
76
	else
77
		subqueries << "[id=\""+m[0]+"\"]"+"[]{"+(m.size-1)+"}"
78
}
79
def query = subqueries.join("|")
80
def initialquery = query
81
while (query.length() > 1500) {
82
	oldquery = query
83
	query = query.substring(0, query.indexOf("|"))
84
	println "Warning : query has been truncated: "+oldquery
85
}
86

  
87
println "CQL: $query"
88
if (debug) println "See debug files in: "+resultsDir
89
if (initialquery != query) println "Initial CQL: $initialquery"
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/partition/PartsSizeMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4
import org.txm.searchengine.cqp.corpus.Partition;
5

  
6
if (!(corpusViewSelection instanceof Partition)) {
7
	println "Select a partition before calling this macro."
8
	return;
9
}
10
Partition p = corpusViewSelection
11

  
12
for (def part : p.getParts()) {
13
	println part.getName()+"\t"+part.getSize()
14
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/DirectoryInfoMacro.groovy (revision 1543)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import java.nio.file.Path
7
import java.nio.file.attribute.FileOwnerAttributeView
8
import java.nio.file.attribute.UserPrincipal
9
import org.txm.rcpapplication.swt.widget.parameters.*
10
import org.txm.Toolbox
11
import org.txm.searchengine.cqp.*
12
import java.io.IOException;
13
import java.nio.file.*
14
import java.nio.file.attribute.*;
15

  
16
// Parameter declaration - Déclaration du paramètre
17
@Field @Option(name="directory", usage="the directory to diagnose", widget="Folder", required=true, def="set da enpos;")
18
		File directory;
19

  
20
// Parameters settings UI
21
if (!ParametersDialog.open(this)) {
22
	println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.")
23
	return
24
}
25

  
26
println "full path="+directory.getAbsolutePath()
27
println " exists? "+directory.exists()
28
println " read? "+directory.canRead()
29
println " write? "+directory.canWrite()
30
println " executable? "+directory.canExecute()
31
println " hidden? "+directory.isHidden()
32
def files = directory.listFiles()
33
println " number of files? "+files.size()
34

  
35

  
36
Path path = Paths.get(directory.getAbsolutePath());
37

  
38
FileOwnerAttributeView ownerAttributeView = Files.getFileAttributeView(path, FileOwnerAttributeView.class);
39
if (ownerAttributeView != null) {
40
	UserPrincipal owner = ownerAttributeView.getOwner();
41
	if (owner != null) println " file owner attribute: "+owner.getName()
42
}
43

  
44
AclFileAttributeView aclAttributeView = Files.getFileAttributeView(path, AclFileAttributeView.class);
45
if (aclAttributeView != null) {
46
	List<AclEntry> acl = aclAttributeView.getAcl();
47
	if (acl != null) {
48
		for (AclEntry entry : acl)
49
			if (acl != null) println " acl entry: "+entry
50
	}
51
}
52

  
53
BasicFileAttributeView basicAttributeView = Files.getFileAttributeView(path, BasicFileAttributeView.class);
54
if (basicAttributeView != null) {
55
	BasicFileAttributes attributes = basicAttributeView.readAttributes()
56
	if (attributes != null)  {
57
		println " basic attributes: creation time: "+attributes.creationTime()
58
		println " basic attributes: last access time: "+attributes.lastAccessTime()
59
		println " basic attributes: last modification time: "+attributes.lastModifiedTime()
60
		println " basic attributes: file key: "+attributes.fileKey()
61
		println " basic attributes: directory file?: "+attributes.isDirectory()
62
		println " basic attributes: symbolic link?: "+attributes.isSymbolicLink()
63
		println " basic attributes: regular file?: "+attributes.isRegularFile()
64
	}
65
}
66

  
67
DosFileAttributeView dosAttributeView = Files.getFileAttributeView(path, DosFileAttributeView.class);
68
if (dosAttributeView != null) {
69
	DosFileAttributes attributes = dosAttributeView.readAttributes()
70
	if (attributes != null) {
71
		println " dos attributes: creation time: "+attributes.creationTime()
72
		println " dos attributes: last access time: "+attributes.lastAccessTime()
73
		println " dos attributes: last modification time: "+attributes.lastModifiedTime()
74
		println " dos attributes: file key: "+attributes.fileKey()
75
		println " dos attributes: directory file?: "+attributes.isDirectory()
76
		println " dos attributes: symbolic link?: "+attributes.isSymbolicLink()
77
		println " dos attributes: regular file?: "+attributes.isRegularFile()
78
		println " dos attributes: archive file?: "+attributes.isArchive()
79
		println " dos attributes: system file?: "+attributes.isSystem()
80
	}
81
}
82

  
83
PosixFileAttributeView posixAttributeView = Files.getFileAttributeView(path, PosixFileAttributeView.class);
84
if (posixAttributeView != null) {
85
	PosixFileAttributes attributes = posixAttributeView.readAttributes()
86
	if (attributes != null) {
87
		println " posix attributes: group: "+attributes.group()
88
		println " posix attributes: creation time: "+attributes.creationTime()
89
		println " posix attributes: last access time: "+attributes.lastAccessTime()
90
		println " posix attributes: last modification time: "+attributes.lastModifiedTime()
91
		println " posix attributes: file key: "+attributes.fileKey()
92
		println " posix attributes: directory file?: "+attributes.isDirectory()
93
		println " posix attributes: symbolic link?: "+attributes.isSymbolicLink()
94
		println " posix attributes: regular file?: "+attributes.isRegularFile()
95
		println " posix attributes: permissions: "+attributes.permissions().sort()
96
	}
97
}
98

  
99
UserDefinedFileAttributeView userdefinedAttributeView = Files.getFileAttributeView(path, UserDefinedFileAttributeView.class);
100
if (userdefinedAttributeView != null) {
101
	def attributes = userdefinedAttributeView.list()
102
	if (attributes != null) {
103
		for (def entry : attributes)
104
			println " user defined attributes: "+entry
105
	}
106
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/SetFileRightsMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7

  
8
// BEGINNING OF PARAMETERS
9

  
10
@Field @Option(name="file", usage="an example file", widget="FileOpen", required=true, def="C:/Temp/foo.txt")
11
def file
12

  
13
@Field @Option(name="read_right", usage="read", widget="Boolean", required=true, def="true")
14
def read_right
15
@Field @Option(name="write_right", usage="write", widget="Boolean", required=true, def="true")
16
def write_right
17
@Field @Option(name="execute_right", usage="execute", widget="Boolean", required=true, def="true")
18
def execute_right
19

  
20
@Field @Option(name="current_user_only", usage="read", widget="Boolean", required=true, def="false")
21
def current_user_only
22

  
23
// Open the parameters input dialog box
24
if (!ParametersDialog.open(this)) return;
25

  
26
// END OF PARAMETERS
27

  
28
if (file.exists()) {
29

  
30
file.setReadable(read_right, current_user_only);
31
file.setWritable(write_right, current_user_only)
32
file.setExecutable(execute_right, current_user_only);
33

  
34
} else {
35
	println "Error: file not found $file"
36
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/ExecPythonMacro.groovy (revision 1543)
1
// STANDARD DECLARATIONS
2
package org.txm.macro
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7

  
8
// BEGINNING OF PARAMETERS
9

  
10
// Declare each parameter here
11
// (available widget types: Query, File, Folder, String, Text, Boolean, Integer, Float and Date)
12

  
13
@Field @Option(name="pythonFile", usage="an example file", widget="FileOpen", required=true, def="script.py")
14
def pythonFile
15

  
16
// Parameters settings UI
17
if (!ParametersDialog.open(this)) {
18
	println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.")
19
	return
20
}
21

  
22
def process = "python $pythonFile".execute()
23
process.text.eachLine {println it}
24

  
25
def exitValue = process.exitValue()
26
if (exitValue != 0) println "Error during execution: $exitValue"
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ExecXSLDOMMacro.groovy (revision 1543)
1
package org.txm.macro.xml;
2
// STANDARD DECLARATIONS
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcpapplication.swt.widget.parameters.*
7
import org.txm.importer.ApplyXsl2;
8
import javax.xml.transform.stream.*
9
import javax.xml.transform.dom.DOMResult
10
import org.w3c.dom.*
11

  
12

  
13
// BEGINNING OF PARAMETERS
14
@Field @Option(name="XSLFile", usage="an example file", widget="File", required=true, def="file.xsl")
15
def XSLFile = new File(System.getProperty("user.home"),"TXM/xsl/identity.xsl")
16

  
17
@Field @Option(name="intputDirectory", usage="an example folder", widget="Folder", required=true, def="in")
18
def intputDirectory = new File(System.getProperty("user.home"),"xml/TESTS2/xml")
19

  
20
//@Field @Option(name="parameters", usage="an example folder", widget="Text", required=false, def="")
21
def parameters = [:]
22

  
23
@Field @Option(name="dom", usage="XSLT Result is - true:  a DOM Element. false - a XSLT Result is XMLStreamReader", widget="Boolean", required=true, def="true")
24
def dom
25

  
26
@Field @Option(name="debug", usage="Show debug messages, value = true|false", widget="Boolean", required=true, def="false")
27
def debug
28

  
29
if (!ParametersDialog.open(this)) return;
30
// END OF PARAMETERS
31

  
32
// USER MANIPULATIONS
33

  
34
def processDOMResult(File inputXMLFile, def resultnode) {
35
	// with resultnode a Element : https://docs.oracle.com/javase/8/docs/api/org/w3c/dom/Element.html
36
	println inputXMLFile.getName()+" -> "+ resultnode.getTagName()
37
}
38

  
39
// END USER MANIPULATIONS
40

  
41
println "Use XSL $XSLFile with parameters $parameters"
42
println "Processed directory: $intputDirectory"
43

  
44
def files = [] 
45
ApplyXsl2 a = new ApplyXsl2(XSLFile.getAbsolutePath());
46
intputDirectory.eachFileMatch(~/.+\.(xml|XML)/) { XMLFile ->
47
	String name = XMLFile.getName()
48
	try {
49
		def result = process(a, XMLFile, [:]);
50
		if (dom) processDOMResult(XMLFile, result.getNode().getDocumentElement());
51
		else processSaxResult(XMLFile, result);
52
		files << XMLFile
53
	} catch (Exception e) {
54
		println "Warning: XSL transformation of '$name' failed with error=$e with "
55
		if (debug) e.printStackTrace(); 
56
	}
57
}
58

  
59
def process(ApplyXsl2 a, File inputXMLFile, def args) throws Exception {
60
	for (String k : args.keySet()) {
61
		if (!this.setParam(k, args[k]))
62
			return false;
63
	}
64
	
65
	def result = null;
66
	if (dom) result = new DOMResult();
67
	else {
68
		XMLStreamReader xmlreader = new XMLStreamReader();
69
		PipedInputStream inpipe = new PipedInputStream(xmlreader
70
		PipedOutputStream outpipe = new PipedOutputStream();
71
		result = new StreamResult(new BufferedOutputStream(new FileOutputStream(xmloutfile)));
72
	}
73
	a.transformer.transform(new StreamSource(inputXMLFile), result);
74
	a.cleanMemory(); // save memory
75
	a.resetParams()
76
	return result;
77
}

Formats disponibles : Unified diff