Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / bin / org / txm / importer / srcmf / SrcmfImporter.groovy @ 458

History | View | Annotate | Download (2.5 kB)

1
/*
2
 * Calls all import scripts
3
 */
4

    
5
/**
6
 *
7
 * @author tmr
8
 */
9

    
10
// Command line form of import statements:
11
// import MasterReheader
12
// import PunctInjectImport
13
// import SubcorpusDataInject
14
// import TigerSubcorpus2Main
15
// import java.util.logging.FileHandler
16
// import javax.xml.parsers.DocumentBuilderFactory
17

    
18
// TXM package statement
19
package org.txm.importer.srcmf
20

    
21
import java.util.logging.*;
22

    
23

    
24
// Command line entry point
25
def cli = new CliBuilder(
26
    usage:'SrcmfImport.groovy [options] tiger_master.xml xml_txm.xml header_file.xml'
27
)
28
cli.h(longOpt:'help', 'Prints this message.')
29
options = cli.parse(args)
30
if (options.arguments().size() != 3) {
31
    println 'Incorrect number of command line arguments... exiting'
32
    println cli.usage()
33
    System.exit(2)
34
}
35

    
36
def tigerFile = new File(options.arguments()[0])
37
def txmFile = new File(options.arguments()[1])
38
def headerFile = new File(options.arguments()[2])
39
def tigerXmlAll = doAllButPnc(
40
    tigerFile, 
41
    txmFile, 
42
    headerFile,
43
    txmFile.getAbsoluteFile().getParentFile().getParentFile()
44
)
45
doPnc(tigerXmlAll, txmFile)
46
tigerXmlAll.delete()
47

    
48
def doAllButPnc(File tigerFile, File txmFile, File headerFile, File binDir) {
49
    // Run pos injection script
50
    File txmSrcDir = txmFile.getAbsoluteFile().getParentFile()
51
    File tigerDir = new File(binDir, "tiger")
52
    tigerDir.mkdir()
53
    File masterpos = new File(tigerDir, "master_pos.xml")
54
    File xmltxm = txmSrcDir.listFiles()[0]
55
    File logFile = new File(binDir, "tiger.log")
56
    def sdi = new SubcorpusDataInject(
57
        xmltxm, 
58
        new FileHandler(logFile.getAbsolutePath()), "vers"
59
    )
60
    sdi.processMaster(tigerFile, masterpos)
61
    // Run reheader script
62
    def reheader = new MasterReheader()
63
    File tmp = File.createTempFile("tmp", ".xml",tigerDir)
64
    def feats = ['nt':['cat', 'type', 'coord'], 't':['pos', 'form', 'q']]
65
    def firstFeat = ['nt':'cat', 't':'word']
66
    reheader.script(masterpos, headerFile, tmp, feats, firstFeat)
67
    if (!tmp.exists()) {
68
            println "Error: reheader failed"
69
    }
70
    masterpos.delete()
71
    tmp.renameTo(masterpos)
72
    // Run merge master & subcorpus script
73
    def tigerXmlAll = new File(masterpos.getParentFile(), "TigerAll.xml")
74
    def mergescript = new TigerSubcorpus2Main()
75
    mergescript.script(masterpos, tigerXmlAll)
76
    return tigerXmlAll
77
}
78

    
79
def doPnc(File tigerXmlAll, File txmFile) {
80
    injector = new PunctInjectImport(tigerXmlAll, txmFile)
81
    injector.outputFile = new File(tigerXmlAll.getParentFile(), "TigerPnc.xml")
82
    injector.process()
83
}