Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / limsi / importer.groovy @ 1000

History | View | Annotate | Download (5.7 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$
27
//
28

    
29
package org.txm.scripts.importer.limsi
30

    
31
import java.util.ArrayList;
32
import org.txm.utils.Pair;
33

    
34
import javax.xml.transform.*;
35
import javax.xml.transform.dom.DOMSource;
36
import javax.xml.transform.stream.StreamResult;
37

    
38
import org.txm.utils.logger.Log;
39
import org.txm.importer.scripts.xmltxm.*;
40
import org.txm.stat.utils.ConsoleProgressBar
41

    
42
import java.io.BufferedWriter;
43
import java.io.File;
44
import java.io.FileOutputStream;
45
import java.io.IOException;
46
import java.io.OutputStreamWriter;
47
import java.io.Writer;
48
import org.w3c.dom.*;
49
import org.xml.sax.SAXException;
50
import javax.xml.parsers.*;
51
import javax.xml.xpath.*;
52

    
53
import java.util.HashMap;
54
import org.txm.scripts.importer.*;
55
import org.txm.utils.*;
56
import org.txm.metadatas.*;
57

    
58
// TODO: Auto-generated Javadoc
59
/**
60
 * The Class importer.
61
 */
62
class importer {
63

    
64
        /** The speakers. */
65
        HashMap<String, String> speakers = ["spk1":"//Speaker[@id='spk1']", "spk2":"//Speaker[@id='spk2']"];
66

    
67
        /** The topics. */
68
        HashMap<String, String> topics = ["to1":"//Topic[@id='to1']", "to2":"//Topic[@id='to2']"];
69

    
70
        /** The trans. */
71
        HashMap<String, String> trans = ["trans":"//Trans"];
72

    
73
        /** The doc. */
74
        def doc;
75

    
76
        /** The infile. */
77
        File infile;
78

    
79
        /** The outfile. */
80
        File outfile;
81

    
82
        /** The outdir. */
83
        File txmDir;
84
        File binDir;
85

    
86
        /** The trsfiles. */
87
        ArrayList<String> trsfiles;
88

    
89
        /** The metadata. */
90
        Metadatas metadatas;
91

    
92
        /**
93
         * Instantiates a new importer.
94
         *
95
         * @param trsfiles the trsfiles
96
         * @param outdir the outdir
97
         * @param metadata the metadata
98
         */
99
        public importer(ArrayList<File> trsfiles, File binDir, File txmDir)
100
        {
101
                this.trsfiles = trsfiles;
102
                this.txmDir = txmDir;
103
                this.binDir = binDir;
104
        }
105

    
106
        /**
107
         * Run.
108
         *
109
         * @return true, if successful
110
         */
111
        public boolean run()
112
        {
113
                if (trsfiles == null) {
114
                        println "no files to process"
115
                        return false;
116
                }
117
                txmDir.mkdir();
118
                if (!txmDir.exists()) {
119
                        println "can't create txmDir: "+txmDir.getAbsolutePath()
120
                }
121

    
122
                println "Convert from LIMSI files to CQP files ("+trsfiles.size()+")."
123
                ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
124
                for (File infile : trsfiles) {
125
                        cpb.tick();
126

    
127
                        String filename = infile.getName();
128
                        int idx = filename.lastIndexOf(".");
129
                        if (idx > 0) filename = filename.substring(0, idx)
130

    
131
                        File outfile = new File(txmDir, filename+".cqp")
132
                        
133
                        if (outfile.exists() && outfile.lastModified() >= infile.lastModified()) {
134
                                // skip
135
                        } else {
136
                                def processor = new LimsiToCQP(infile);
137
                                processor.process(outfile);
138
                        }
139
                }
140

    
141
                println ""
142
                return txmDir.listFiles() != null;
143
        }
144

    
145
        /**
146
         * Process.
147
         *
148
         * @param infile the infile
149
         * @param outfile the outfile
150
         * @param metas the metas
151
         * @return true, if successful
152
         */
153
        public boolean process(File infile, File outfile, ArrayList<Pair<String, String>> metas)
154
        {
155
                //inject metadata into
156
                this.infile = infile;
157
                this.outfile = outfile;
158
                def factory = DocumentBuilderFactory.newInstance()
159
                factory.setXIncludeAware(true);
160
                def builder = factory.newDocumentBuilder()
161
                
162
                doc = builder.parse(infile)
163
                insert(trans.get("trans"), metas);
164
                return save();
165
        }
166

    
167
        /**
168
         * Insert.
169
         *
170
         * @param xpath the xpath
171
         * @param pairs the pairs
172
         */
173
        public void insert(String xpath, List<Pair<String, String>> pairs)
174
        {
175
                println ("insert $pairs into $xpath")
176
                def expr = XPathFactory.newInstance().newXPath().compile(xpath)
177
                def nodes = expr.evaluate(doc, XPathConstants.NODESET)
178

    
179
                for (Node node : nodes) {
180
                        Element elem = (Element)node;
181
                        for (Pair<String, String> p : pairs) {
182
                                elem.setAttribute(p.getFirst(), p.getSecond());
183
                        }
184
                }
185
        }
186

    
187
        /**
188
         * Save.
189
         *
190
         * @return true, if successful
191
         */
192
        private boolean save()
193
        {
194
                try {
195
                        // Création de la source DOM
196
                        Source source = new DOMSource(doc);
197

    
198
                        // Création du fichier de sortie
199
                        Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
200
                        Result resultat = new StreamResult(writer);
201

    
202
                        // Configuration du transformer
203
                        TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
204
                        Transformer transformer = fabrique.newTransformer();
205
                        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
206
                        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
207
                        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
208

    
209
                        // Transformation
210
                        transformer.transform(source, resultat);
211
                        writer.close();
212
                        return true;
213
                } catch (Exception e) {
214
                        org.txm.utils.logger.Log.printStackTrace(e);
215
                        return false;
216
                }
217
        }
218

    
219
        /**
220
         * The main method.
221
         *
222
         * @param args the arguments
223
         */
224
        public static void main(String[] args)
225
        {
226
                String userhome = System.getProperty("user.home");
227
                File metadatasfile = new File(userhome,"/xml/transcriber/metadatas.xml");
228
                File infile = new File(userhome,"/xml/transcriber/dialogue.trs");
229
                File outfile = new File(userhome,"/xml/transcriber/dialogue-out.trs");
230
                new importer().run(infile, outfile, metadatasfile);
231
        }
232
}