Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / scripts / importer / srcmf / SrcmfImporter.groovy @ 2080

History | View | Annotate | Download (2.5 kB)

1
/*
2
 * Calls all import scripts
3
 */
4

    
5
/**
6
 *
7
 * @author tmr
8
 */
9

    
10
// Command line form of import statements:
11
// import MasterReheader
12
// import PunctInjectImport
13
// import SubcorpusDataInject
14
// import TigerSubcorpus2Main
15
// import java.util.logging.FileHandler
16
// import javax.xml.parsers.DocumentBuilderFactory
17

    
18
// TXM package statement
19
package org.txm.scripts.importer.srcmf
20

    
21
import java.util.logging.*;
22

    
23
import org.txm.utils.io.IOUtils
24

    
25

    
26
// Command line entry point
27
def cli = new CliBuilder(
28
    usage:'SrcmfImport.groovy [options] tiger_master.xml xml_txm.xml header_file.xml'
29
)
30
cli.h(longOpt:'help', 'Prints this message.')
31
options = cli.parse(args)
32
if (options.arguments().size() != 3) {
33
    println 'Incorrect number of command line arguments... exiting'
34
    println cli.usage()
35
    System.exit(2)
36
}
37

    
38
def tigerFile = new File(options.arguments()[0])
39
def txmFile = new File(options.arguments()[1])
40
def headerFile = new File(options.arguments()[2])
41
def tigerXmlAll = doAllButPnc(
42
    tigerFile, 
43
    txmFile, 
44
    headerFile,
45
    txmFile.getAbsoluteFile().getParentFile().getParentFile()
46
)
47
doPnc(tigerXmlAll, txmFile)
48
tigerXmlAll.delete()
49

    
50
def doAllButPnc(File tigerFile, File txmFile, File headerFile, File binDir) {
51
    // Run pos injection script
52
    File txmSrcDir = txmFile.getAbsoluteFile().getParentFile()
53
    File tigerDir = new File(binDir, "tiger")
54
    tigerDir.mkdir()
55
    File masterpos = new File(tigerDir, "master_pos.xml")
56
    File xmltxm = txmSrcDir.listFiles(IOUtils.HIDDENFILE_FILTER)[0]
57
    File logFile = new File(binDir, "tiger.log")
58
    def sdi = new SubcorpusDataInject(
59
        xmltxm, 
60
        new FileHandler(logFile.getAbsolutePath()), "vers"
61
    )
62
    sdi.processMaster(tigerFile, masterpos)
63
    // Run reheader script
64
    def reheader = new MasterReheader()
65
    File tmp = File.createTempFile("tmp", ".xml",tigerDir)
66
    def feats = ['nt':['cat', 'type', 'coord'], 't':['pos', 'form', 'q']]
67
    def firstFeat = ['nt':'cat', 't':'word']
68
    reheader.script(masterpos, headerFile, tmp, feats, firstFeat)
69
    if (!tmp.exists()) {
70
            println "Error: reheader failed"
71
    }
72
    masterpos.delete()
73
    tmp.renameTo(masterpos)
74
    // Run merge master & subcorpus script
75
    def tigerXmlAll = new File(masterpos.getParentFile(), "TigerAll.xml")
76
    def mergescript = new TigerSubcorpus2Main()
77
    mergescript.script(masterpos, tigerXmlAll)
78
    return tigerXmlAll
79
}
80

    
81
def doPnc(File tigerXmlAll, File txmFile) {
82
    injector = new PunctInjectImport(tigerXmlAll, txmFile)
83
    injector.outputFile = new File(tigerXmlAll.getParentFile(), "TigerPnc.xml")
84
    injector.process()
85
}