Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / scripts / importer / srcmf / TigerSubcorpus2Main.groovy @ 1000

History | View | Annotate | Download (1.4 kB)

1 911 mdecorde
/*
2 911 mdecorde
Combines subcorpus files to give a single main file.
3 911 mdecorde
 */
4 911 mdecorde
// TXM package statement
5 1000 mdecorde
package org.txm.scripts.importer.srcmf
6 911 mdecorde
7 911 mdecorde
// Command-line entry point
8 911 mdecorde
9 911 mdecorde
def cli = new CliBuilder(
10 911 mdecorde
    usage:'TigerSubcorpus2Main.groovy [options] tiger_master.xml'
11 911 mdecorde
)
12 911 mdecorde
cli.h(longOpt:'help', 'Prints this message.')
13 911 mdecorde
cli.o(longOpt:'output', args:1, argName:'outputfile.xml', 'Output to given file.')
14 911 mdecorde
options = cli.parse(args)
15 911 mdecorde
if (options.arguments().size() != 1) {
16 911 mdecorde
    println 'Incorrect number of command line arguments... exiting'
17 911 mdecorde
    println cli.usage()
18 911 mdecorde
    System.exit(2)
19 911 mdecorde
}
20 911 mdecorde
def tigerMaster = new File(options.arguments()[0])
21 911 mdecorde
def outputFile = new File('outputfile.xml')
22 911 mdecorde
if (options.o) {
23 911 mdecorde
    outputFile = new File(options.o)
24 911 mdecorde
}
25 911 mdecorde
script(tigerMaster, outputFile)
26 911 mdecorde
27 911 mdecorde
def script (File tigerMaster, File outputFile) {
28 911 mdecorde
    def masterDoc = new XmlParser().parse(tigerMaster)
29 911 mdecorde
    for (def body : masterDoc.body) {
30 911 mdecorde
        for (def subNode : body.subcorpus) {
31 911 mdecorde
                File subCorpusFile = new File(tigerMaster.getParentFile(), (""+subNode.@external).substring(5))
32 911 mdecorde
                def subDoc = new XmlParser().parse(subCorpusFile)
33 911 mdecorde
                body.remove(subNode)
34 911 mdecorde
                for (def child : subDoc.children())
35 911 mdecorde
                        body.append(child)
36 911 mdecorde
        }
37 911 mdecorde
    }
38 911 mdecorde
    def strWriter = new StringWriter()
39 911 mdecorde
    new groovy.util.XmlNodePrinter(new PrintWriter(strWriter)).print(masterDoc)
40 911 mdecorde
    def rez = strWriter.toString()
41 911 mdecorde
    outputFile.withWriter("UTF-8") { writer ->
42 911 mdecorde
        writer.println('<?xml version="1.0" encoding="UTF-8"?>')
43 911 mdecorde
        writer.print(rez)
44 911 mdecorde
    }
45 911 mdecorde
}