Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / src / org / txm / importer / srcmf / TigerSubcorpus2Main.groovy @ 458

History | View | Annotate | Download (1.4 kB)

1
/*
2
Combines subcorpus files to give a single main file.
3
 */
4
// TXM package statement
5
package org.txm.importer.srcmf
6

    
7
// Command-line entry point
8

    
9
def cli = new CliBuilder(
10
    usage:'TigerSubcorpus2Main.groovy [options] tiger_master.xml'
11
)
12
cli.h(longOpt:'help', 'Prints this message.')
13
cli.o(longOpt:'output', args:1, argName:'outputfile.xml', 'Output to given file.')
14
options = cli.parse(args)
15
if (options.arguments().size() != 1) {
16
    println 'Incorrect number of command line arguments... exiting'
17
    println cli.usage()
18
    System.exit(2)
19
}
20
def tigerMaster = new File(options.arguments()[0])
21
def outputFile = new File('outputfile.xml')
22
if (options.o) {
23
    outputFile = new File(options.o)
24
}
25
script(tigerMaster, outputFile)
26

    
27
def script (File tigerMaster, File outputFile) {
28
    def masterDoc = new XmlParser().parse(tigerMaster)
29
    for (def body : masterDoc.body) {
30
        for (def subNode : body.subcorpus) {
31
                File subCorpusFile = new File(tigerMaster.getParentFile(), (""+subNode.@external).substring(5))
32
                def subDoc = new XmlParser().parse(subCorpusFile)
33
                body.remove(subNode)
34
                for (def child : subDoc.children())
35
                        body.append(child)
36
        }
37
    }
38
    def strWriter = new StringWriter()
39
    new groovy.util.XmlNodePrinter(new PrintWriter(strWriter)).print(masterDoc)
40
    def rez = strWriter.toString()
41
    outputFile.withWriter("UTF-8") { writer ->
42
        writer.println('<?xml version="1.0" encoding="UTF-8"?>')
43
        writer.print(rez)
44
    }
45
}