Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / export / ts / MatchInject.groovy @ 187

History | View | Annotate | Download (3.2 kB)

1
#! /usr/bin/groovy
2
package org.txm.export.ts;
3

    
4
import org.xml.sax.XMLReader;
5
import org.xml.sax.helpers.XMLReaderFactory;
6
import groovy.util.slurpersupport.NodeChild
7

    
8
/*
9
 * The script takes the <matches/> elements from file "Tiger_match.xml"
10
 * and inserts them at the end of the <s/> element bearing the same ID.
11
 * Inputs: three file names 
12
 * - TsInputName --- the TS file WITHOUT matches
13
 * - MatchInputName --- the TS file containing only matches.
14
 * - OutputFileName ---required output file.
15
 * To pass these arguments from within an application, call script() directly.
16
 */
17

    
18
// Filename variables
19
// def TsInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/ts_input.xml'
20
// def MatchInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/match_input.xml'
21
// def OutputFileName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/test.xml'
22

    
23
// Main code: checks for correct number of arguments if run from cmd line.
24
if (args && args.size() == 3) {
25
    script(args[0], args[1], args[2])
26
} else {
27
    println '''Incorrect number of arguments: three strings required.
28

29
USAGE:
30
******
31
groovy MatchInject.groovy TsInput.xml MatchInput.xml OutputFile.xml'''}
32

    
33
def script(String tsInputName, String matchInputName, String outputFileName) {
34
        script(new File(tsInputName), new File(matchInputName, new File(outputFileName)));
35
}
36

    
37
// The script.
38
def script(File tsInputFile, File matchInputFile, File outputFile) {
39
        //println "loading TsInput..."
40
    def TsInput = new XmlSlurper().parse(tsInputFile)
41
        //println "loading matchInputFile..."
42
        System.setProperty("org.xml.sax.driver", "com.sun.org.apache.xerces.internal.parsers.SAXParser");
43
        def xmlReader = XMLReaderFactory.createXMLReader();
44
        xmlReader.setFeature('http://xml.org/sax/features/namespaces', false)
45
    XmlSlurper mslurper = new XmlSlurper(xmlReader);
46
        def MatchInput = mslurper.parse(matchInputFile)
47
        //println "building OutputFile... size="+matchInputFile.length()
48
        
49
        def inputSentences = TsInput.'**'.findAll {it.name() == 's'};
50
        def matcheSentences = MatchInput.'**'.findAll { it.name() == 's' }
51
//        println "MATCHES"
52
//        for(NodeChild match : matcheSentences) {
53
//                //println match.getClass()
54
//                match.namespacePrefix = ""
55
//                match.namespaceMap = [:]
56
//                //println match
57
//        }
58
        int count = 0;
59
        //println("nb of input sentences: "+inputSentences.size());
60
        
61
    def markup = {
62
                mkp.xmlDeclaration()
63
                //mkp.declareNamespace("svg":"xmlns=\"http://www.w3.org/2000/svg\"")
64
                
65
        corpus(id:'TSOut') {
66
            body {
67
                inputSentences.each { sPath ->
68
                                        //if (count++%200 == 0) println((100*count/inputSentences.size()))
69
                    s(id:"${sPath.'@id'}") { 
70
                        mkp.yield(sPath.graph)
71
                        def sMatches = matcheSentences.find {
72
                            it.'@id'.toString() == sPath.'@id'.toString()
73
                        }
74
                                                
75
                        if (sMatches) {
76
                            mkp.yield(sMatches.matches)
77
                        }
78
                                                //matcheSentences.removeAll(sMatches)
79
                    }
80
                }
81
            }
82
        }
83
    }
84
    def processor = new groovy.xml.StreamingMarkupBuilder().bind(markup)
85
    outputFile.withWriter { it << groovy.xml.XmlUtil.serialize(processor)}
86
}