Révision 3243
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDsMacro.groovy (revision 3243) | ||
---|---|---|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.xml |
|
8 |
|
|
9 |
import org.txm.utils.FileUtils; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
import groovy.transform.Field |
|
12 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
13 |
import org.txm.importer.StaxIdentityParser |
|
14 |
import javax.xml.stream.XMLStreamException |
|
15 |
|
|
16 |
@Field @Option(name="inputFile", usage="input file (XML)", widget="FileOpen", required=true, def="input.xml") |
|
17 |
def inputFile |
|
18 |
|
|
19 |
@Field @Option(name="outputFile", usage="output file", widget="FileSave", required=true, def="output.xml") |
|
20 |
def outputFile |
|
21 |
|
|
22 |
// Open the parameters input dialog box |
|
23 |
if (!ParametersDialog.open(this)) return |
|
24 |
|
|
25 |
// END OF PARAMETERS |
|
26 |
|
|
27 |
if (!(inputFile.exists() && inputFile.canRead())) { |
|
28 |
println "** ForceWordIDs: cannot find $inputFile" |
|
29 |
return false |
|
30 |
} |
|
31 |
|
|
32 |
print "Processing "+inputFile+"..." |
|
33 |
|
|
34 |
outputFile.getParentFile().mkdirs() |
|
35 |
|
|
36 |
// build text id |
|
37 |
textid = FileUtils.stripExtension(inputFile) |
|
38 |
|
|
39 |
def parser = new StaxIdentityParser(inputFile) { |
|
40 |
|
|
41 |
int wordnumber = 1 |
|
42 |
|
|
43 |
/* ne fonctionne pas |
|
44 |
protected void writeStartElement() throws XMLStreamException { |
|
45 |
println localname |
|
46 |
if (localname ==~ "w|pc") { // a word |
|
47 |
localname = "w" |
|
48 |
} |
|
49 |
|
|
50 |
super.writeStartElement() |
|
51 |
} |
|
52 |
*/ |
|
53 |
|
|
54 |
protected void writeAttributes() throws XMLStreamException { |
|
55 |
if (localname ==~ "w|pc") { // a word |
|
56 |
boolean idwritten = false |
|
57 |
attCount = parser.getAttributeCount() |
|
58 |
attCount.times { i -> |
|
59 |
if (parser.getAttributeLocalName(i) == "id") { // update & backup |
|
60 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), "w_"+textid+"_"+(wordnumber++)) // force id |
|
61 |
writeAttribute(parser.getAttributePrefix(i), "foreign-id", parser.getAttributeValue(i)) // backup |
|
62 |
idwritten = true |
|
63 |
} else if (parser.getAttributeLocalName(i) != "foreign-id") { |
|
64 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)) |
|
65 |
} |
|
66 |
} |
|
67 |
|
|
68 |
if (!idwritten) { // create id |
|
69 |
writeAttribute(null, "id", "w_"+textid+"_"+(wordnumber++)) |
|
70 |
} |
|
71 |
} else { |
|
72 |
super.writeAttributes() |
|
73 |
} |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
res = parser.process(outputFile) |
|
78 |
|
|
79 |
println " done." |
|
80 |
|
|
81 |
return res |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDParser.groovy (revision 3243) | ||
---|---|---|
1 |
// Copyright © 2021 MY_INSTITUTION |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
|
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.xml |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.importer.StaxIdentityParser |
|
12 |
import javax.xml.stream.XMLStreamException |
|
13 |
|
|
14 |
class ForceWordIDParser extends StaxIdentityParser { |
|
15 |
public int wordnumber = 1 |
|
16 |
String wordElement; |
|
17 |
String foreingIDAttribute; |
|
18 |
String textid |
|
19 |
|
|
20 |
public ForceWordIDParser(File xmlFile, String wordElement, String foreingIDAttribute, String textid) { |
|
21 |
super(xmlFile); |
|
22 |
this.wordElement = wordElement |
|
23 |
this.foreingIDAttribute = foreingIDAttribute |
|
24 |
this.textid = textid |
|
25 |
} |
|
26 |
|
|
27 |
protected void writeAttributes() throws XMLStreamException { |
|
28 |
if (localname ==~ wordElement) { // a word |
|
29 |
boolean idwritten = false |
|
30 |
def attCount = parser.getAttributeCount() |
|
31 |
attCount.times { i -> |
|
32 |
if (parser.getAttributeLocalName(i) == "id") { // update & backup |
|
33 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), "w_"+textid+"_"+(wordnumber++)) // force id |
|
34 |
writeAttribute(parser.getAttributePrefix(i), foreingIDAttribute, parser.getAttributeValue(i)) // backup |
|
35 |
idwritten = true |
|
36 |
} else if (parser.getAttributeLocalName(i) != foreingIDAttribute) { |
|
37 |
writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i)) |
|
38 |
} |
|
39 |
} |
|
40 |
|
|
41 |
if (!idwritten) { // create id |
|
42 |
writeAttribute(null, "id", "w_"+textid+"_"+(wordnumber++)) |
|
43 |
} |
|
44 |
} else { |
|
45 |
super.writeAttributes() |
|
46 |
} |
|
47 |
} |
|
48 |
|
|
49 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDDirMacro.groovy (revision 3243) | ||
---|---|---|
1 |
// Copyright © 2019,2021 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.xml |
|
8 |
|
|
9 |
import org.txm.utils.FileUtils; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
import groovy.transform.Field |
|
12 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
13 |
import org.txm.importer.StaxIdentityParser |
|
14 |
import javax.xml.stream.XMLStreamException |
|
15 |
|
|
16 |
@Field @Option(name="inputDirectory", usage="input directory (.xml files)", widget="Folder", required=true, def="") |
|
17 |
def inputDirectory |
|
18 |
|
|
19 |
@Field @Option(name="outputDirectory", usage="output directory", widget="Folder", required=true, def="") |
|
20 |
def outputDirectory |
|
21 |
|
|
22 |
@Field @Option(name="wordElement", usage="word element regex", widget="String", required=true, def="word") |
|
23 |
def wordElement |
|
24 |
|
|
25 |
@Field @Option(name="foreignIDAttribute", usage="forein id attribute to use if 'id' is already set", widget="String", required=true, def="fn-id") |
|
26 |
def foreignIDAttribute |
|
27 |
|
|
28 |
// Open the parameters input dialog box |
|
29 |
if (!ParametersDialog.open(this)) return |
|
30 |
|
|
31 |
// END OF PARAMETERS |
|
32 |
|
|
33 |
if (!inputDirectory.exists()) { |
|
34 |
println "** ForceWordIDDir: no '"+inputDirectory.name+"' directory found. Aborting." |
|
35 |
return false |
|
36 |
} |
|
37 |
|
|
38 |
if (!inputDirectory.canRead()) { |
|
39 |
println "** ForceWordIDDir: '"+inputDirectory.name+"' directory not readable. Aborting." |
|
40 |
return false |
|
41 |
} |
|
42 |
|
|
43 |
def f = [] |
|
44 |
inputDirectory.eachFileMatch(~/.*xml/) { f << it } |
|
45 |
|
|
46 |
if (f.size() == 0) { |
|
47 |
println "** ForceWordIDDir: no .xml file found. Aborting." |
|
48 |
return false |
|
49 |
} |
|
50 |
|
|
51 |
try { |
|
52 |
|
|
53 |
f.sort{ it.name }.each { inputFile -> |
|
54 |
|
|
55 |
def outputFile = new File(outputDirectory, inputFile.name) |
|
56 |
|
|
57 |
res = gse.run(ForceWordIDFileMacro, ["args":[ |
|
58 |
|
|
59 |
"inputFile": inputFile, |
|
60 |
"outputFile": outputFile, |
|
61 |
"wordElement": wordElement, |
|
62 |
"foreignIDAttribute": foreignIDAttribute, |
|
63 |
|
|
64 |
"selection":selection, |
|
65 |
"selections":selections, |
|
66 |
"corpusViewSelection":corpusViewSelection, |
|
67 |
"corpusViewSelections":corpusViewSelections, |
|
68 |
"monitor":monitor]]) |
|
69 |
|
|
70 |
if (!res) println "** problem calling ForceWordIDFileMacro." |
|
71 |
} |
|
72 |
|
|
73 |
} catch (Exception e) { |
|
74 |
println "** ForceWordIDDir: unable to read input files. Aborting." |
|
75 |
println e.getLocalizedMessage() |
|
76 |
println e.printStackTrace() |
|
77 |
return false |
|
78 |
} |
|
79 |
|
|
80 |
return true |
|
81 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDFileMacro.groovy (revision 3243) | ||
---|---|---|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.xml |
|
8 |
|
|
9 |
import org.txm.utils.FileUtils; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
import groovy.transform.Field |
|
12 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
13 |
import org.txm.importer.StaxIdentityParser |
|
14 |
import javax.xml.stream.XMLStreamException |
|
15 |
|
|
16 |
@Field @Option(name="inputFile", usage="input file (.xml)", widget="FileOpen", required=true, def="") |
|
17 |
def inputFile |
|
18 |
|
|
19 |
@Field @Option(name="outputFile", usage="output file", widget="FileSave", required=true, def="") |
|
20 |
def outputFile |
|
21 |
|
|
22 |
@Field @Option(name="wordElement", usage="word element regex", widget="String", required=true, def="w|pc") |
|
23 |
def wordElement |
|
24 |
|
|
25 |
@Field @Option(name="foreignIDAttribute", usage="forein id attribute to use if 'id' is already set", widget="String", required=true, def="fn-id") |
|
26 |
def foreignIDAttribute |
|
27 |
|
|
28 |
// Open the parameters input dialog box |
|
29 |
if (!ParametersDialog.open(this)) return |
|
30 |
|
|
31 |
// END OF PARAMETERS |
|
32 |
|
|
33 |
if (!(inputFile.exists() && inputFile.canRead())) { |
|
34 |
println "** ForceWordIDFile: cannot find $inputFile" |
|
35 |
return false |
|
36 |
} |
|
37 |
|
|
38 |
print "Processing "+inputFile+"..." |
|
39 |
|
|
40 |
outputFile.getParentFile().mkdirs() |
|
41 |
|
|
42 |
// build text id |
|
43 |
textid = FileUtils.stripExtension(inputFile) |
|
44 |
|
|
45 |
ForceWordIDParser parser = new ForceWordIDParser(inputFile, wordElement, foreignIDAttribute, textid); |
|
46 |
|
|
47 |
def res = parser.process(outputFile) |
|
48 |
|
|
49 |
println " Done." |
|
50 |
|
|
51 |
return "ok" |
Formats disponibles : Unified diff