Révision 3243

tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDsMacro.groovy (revision 3243)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.xml
8

  
9
import org.txm.utils.FileUtils;
10
import org.kohsuke.args4j.*
11
import groovy.transform.Field
12
import org.txm.rcpapplication.swt.widget.parameters.*
13
import org.txm.importer.StaxIdentityParser
14
import javax.xml.stream.XMLStreamException
15

  
16
@Field @Option(name="inputFile", usage="input file (XML)", widget="FileOpen", required=true, def="input.xml")
17
def inputFile
18

  
19
@Field @Option(name="outputFile", usage="output file", widget="FileSave", required=true, def="output.xml")
20
def outputFile
21

  
22
// Open the parameters input dialog box
23
if (!ParametersDialog.open(this)) return
24

  
25
// END OF PARAMETERS
26

  
27
if (!(inputFile.exists() && inputFile.canRead())) {
28
	println "** ForceWordIDs: cannot find $inputFile"
29
	return false
30
}
31

  
32
print "Processing "+inputFile+"..."
33

  
34
outputFile.getParentFile().mkdirs()
35

  
36
// build text id
37
textid = FileUtils.stripExtension(inputFile)
38

  
39
def parser = new StaxIdentityParser(inputFile) {
40
	
41
	int wordnumber = 1
42
	
43
	/* ne fonctionne pas
44
	protected void writeStartElement() throws XMLStreamException {
45
		println localname
46
		if (localname ==~ "w|pc") { // a word
47
			localname = "w"
48
		} 
49
		
50
		super.writeStartElement()
51
	}
52
	*/
53
	
54
	protected void writeAttributes() throws XMLStreamException {
55
		if (localname ==~ "w|pc") { 																								// a word
56
			boolean idwritten = false
57
			attCount = parser.getAttributeCount()
58
			attCount.times { i ->
59
				if (parser.getAttributeLocalName(i) == "id") { // update & backup
60
					writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), "w_"+textid+"_"+(wordnumber++)) 	// force id
61
					writeAttribute(parser.getAttributePrefix(i), "foreign-id", parser.getAttributeValue(i)) 						// backup
62
					idwritten = true
63
				} else if (parser.getAttributeLocalName(i) != "foreign-id") {
64
					writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
65
				}
66
			}
67
			
68
			if (!idwritten) { 																										// create id
69
				writeAttribute(null, "id", "w_"+textid+"_"+(wordnumber++))
70
			}
71
		} else {
72
			super.writeAttributes()
73
		}
74
	}
75
}
76

  
77
res = parser.process(outputFile)
78

  
79
println " done."
80

  
81
return res
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDParser.groovy (revision 3243)
1
// Copyright © 2021 MY_INSTITUTION
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author mdecorde
4

  
5
// STANDARD DECLARATIONS
6
package org.txm.macro.xml
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.importer.StaxIdentityParser
12
import javax.xml.stream.XMLStreamException
13

  
14
class ForceWordIDParser extends StaxIdentityParser {
15
	public int wordnumber = 1
16
	String wordElement;
17
	String foreingIDAttribute;
18
	String textid
19
	
20
	public ForceWordIDParser(File xmlFile, String wordElement, String foreingIDAttribute, String textid) {
21
		super(xmlFile);
22
		this.wordElement = wordElement
23
		this.foreingIDAttribute = foreingIDAttribute
24
		this.textid = textid
25
	}
26
	
27
	protected void writeAttributes() throws XMLStreamException {
28
		if (localname ==~ wordElement) { 																								// a word
29
			boolean idwritten = false
30
			def attCount = parser.getAttributeCount()
31
			attCount.times { i ->
32
				if (parser.getAttributeLocalName(i) == "id") { // update & backup
33
					writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), "w_"+textid+"_"+(wordnumber++)) 	// force id
34
					writeAttribute(parser.getAttributePrefix(i), foreingIDAttribute, parser.getAttributeValue(i)) 						// backup
35
					idwritten = true
36
				} else if (parser.getAttributeLocalName(i) != foreingIDAttribute) {
37
					writeAttribute(parser.getAttributePrefix(i), parser.getAttributeLocalName(i), parser.getAttributeValue(i))
38
				}
39
			}
40
			
41
			if (!idwritten) { 																										// create id
42
				writeAttribute(null, "id", "w_"+textid+"_"+(wordnumber++))
43
			}
44
		} else {
45
			super.writeAttributes()
46
		}
47
	}
48

  
49
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDDirMacro.groovy (revision 3243)
1
// Copyright © 2019,2021 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.xml
8

  
9
import org.txm.utils.FileUtils;
10
import org.kohsuke.args4j.*
11
import groovy.transform.Field
12
import org.txm.rcpapplication.swt.widget.parameters.*
13
import org.txm.importer.StaxIdentityParser
14
import javax.xml.stream.XMLStreamException
15

  
16
@Field @Option(name="inputDirectory", usage="input directory (.xml files)", widget="Folder", required=true, def="")
17
def inputDirectory
18

  
19
@Field @Option(name="outputDirectory", usage="output directory", widget="Folder", required=true, def="")
20
def outputDirectory
21

  
22
@Field @Option(name="wordElement", usage="word element regex", widget="String", required=true, def="word")
23
def wordElement
24

  
25
@Field @Option(name="foreignIDAttribute", usage="forein id attribute to use if 'id' is already set", widget="String", required=true, def="fn-id")
26
def foreignIDAttribute
27

  
28
// Open the parameters input dialog box
29
if (!ParametersDialog.open(this)) return
30

  
31
// END OF PARAMETERS
32

  
33
if (!inputDirectory.exists()) {
34
	println "** ForceWordIDDir: no '"+inputDirectory.name+"' directory found. Aborting."
35
	return false
36
}
37

  
38
if (!inputDirectory.canRead()) {
39
	println "** ForceWordIDDir: '"+inputDirectory.name+"' directory not readable. Aborting."
40
	return false
41
}
42

  
43
def f = []
44
inputDirectory.eachFileMatch(~/.*xml/) { f << it }
45

  
46
if (f.size() == 0) {
47
	println "** ForceWordIDDir: no .xml file found. Aborting."
48
	return false
49
}
50

  
51
try {
52

  
53
f.sort{ it.name }.each { inputFile ->
54

  
55
	def outputFile = new File(outputDirectory, inputFile.name)
56

  
57
	res = gse.run(ForceWordIDFileMacro, ["args":[
58

  
59
				"inputFile": inputFile,
60
				"outputFile": outputFile,
61
				"wordElement": wordElement,
62
				"foreignIDAttribute": foreignIDAttribute,
63

  
64
				"selection":selection,
65
				"selections":selections,
66
				"corpusViewSelection":corpusViewSelection,
67
				"corpusViewSelections":corpusViewSelections,
68
				"monitor":monitor]])
69
				
70
			if (!res) println "** problem calling ForceWordIDFileMacro."
71
}
72

  
73
} catch (Exception e) {
74
	println "** ForceWordIDDir: unable to read input files. Aborting."
75
	println e.getLocalizedMessage()
76
	println e.printStackTrace()
77
	return false
78
}
79

  
80
return true
81

  
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/ForceWordIDFileMacro.groovy (revision 3243)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License version 3 or any later version (https://www.gnu.org/licenses/gpl.html)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.xml
8

  
9
import org.txm.utils.FileUtils;
10
import org.kohsuke.args4j.*
11
import groovy.transform.Field
12
import org.txm.rcpapplication.swt.widget.parameters.*
13
import org.txm.importer.StaxIdentityParser
14
import javax.xml.stream.XMLStreamException
15

  
16
@Field @Option(name="inputFile", usage="input file (.xml)", widget="FileOpen", required=true, def="")
17
def inputFile
18

  
19
@Field @Option(name="outputFile", usage="output file", widget="FileSave", required=true, def="")
20
def outputFile
21

  
22
@Field @Option(name="wordElement", usage="word element regex", widget="String", required=true, def="w|pc")
23
def wordElement
24

  
25
@Field @Option(name="foreignIDAttribute", usage="forein id attribute to use if 'id' is already set", widget="String", required=true, def="fn-id")
26
def foreignIDAttribute
27

  
28
// Open the parameters input dialog box
29
if (!ParametersDialog.open(this)) return
30

  
31
// END OF PARAMETERS
32

  
33
if (!(inputFile.exists() && inputFile.canRead())) {
34
	println "** ForceWordIDFile: cannot find $inputFile"
35
	return false
36
}
37

  
38
print "Processing "+inputFile+"..."
39

  
40
outputFile.getParentFile().mkdirs()
41

  
42
// build text id
43
textid = FileUtils.stripExtension(inputFile)
44

  
45
ForceWordIDParser parser = new ForceWordIDParser(inputFile, wordElement, foreignIDAttribute, textid);
46

  
47
def res = parser.process(outputFile)
48

  
49
println " Done."
50

  
51
return "ok"

Formats disponibles : Unified diff