Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / transcriber / RemoveSpeaker.groovy @ 479

History | View | Annotate | Download (3.8 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.importer.transcriber;
29

    
30

    
31
import org.txm.importer.*;
32
import org.txm.importer.graal.PersonalNamespaceContext
33
import org.txm.utils.*;
34
import org.txm.metadatas.*;
35

    
36
import java.io.File;
37

    
38
import org.w3c.dom.Document;
39
import org.w3c.dom.Element;
40

    
41
import javax.xml.parsers.*;
42
import javax.xml.xpath.*;
43
import javax.xml.transform.*;
44
import javax.xml.transform.dom.DOMSource;
45
import javax.xml.transform.stream.StreamResult;
46

    
47

    
48

    
49
// TODO: Auto-generated Javadoc
50
/**
51
 * Removes "u" tags of TRS file given an "u@name" value. 
52
 * @author mdecorde
53
 *
54
 */
55
public class RemoveSpeaker {
56
        File outfile;
57
        
58
        /** The doc. */
59
        Document doc;
60
        
61
        /**
62
         * Instantiates a new removes the speaker.
63
         *
64
         * @param transcriptionfile the transcriptionfile
65
         * @param outfile the outfile
66
         * @param id the id
67
         */
68
        public RemoveSpeaker(File transcriptionfile, File outfile, String id)
69
        {
70
                System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
71
                                
72
                this.outfile = outfile;
73
                String xpathString = "//tei:u[@spk='"+id+"']";
74
                //println "removing $xpathString in $transcriptionfile"
75
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
76
                //println "domFactory: $domFactory"
77
                domFactory.setNamespaceAware(true); // never forget this!
78
                domFactory.setXIncludeAware(true);
79
                DocumentBuilder builder = domFactory.newDocumentBuilder();
80
                //println "builder $builder"
81
                doc = builder.parse(transcriptionfile);
82
                //println "doc $doc"
83
                
84
                def xpath = XPathFactory.newInstance().newXPath()
85
                xpath.setNamespaceContext(new PersonalNamespaceContext());
86
                def expr = xpath.compile(xpathString);
87
                def nodes = expr.evaluate(doc, XPathConstants.NODESET);
88
                
89
                for (def node : nodes) {
90
                        //println "remove node "+node
91
                        Element elem = (Element)node;
92
                        elem.getParentNode().removeChild(node);
93
                }
94
                save()
95
        }
96
        
97
        /**
98
         * Save.
99
         *
100
         * @return true, if successful
101
         */
102
        private boolean save()
103
        {
104
                try {
105
                        // Création de la source DOM
106
                        Source source = new DOMSource(doc);
107
                        
108
                        // Création du fichier de sortie
109
                        Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); 
110
                        Result resultat = new StreamResult(writer);
111
                        
112
                        // Configuration du transformer
113
                        //TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
114
                        TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
115
                        //println "fabrique $fabrique"
116
                        Transformer transformer = fabrique.newTransformer();
117
                        //println "transformer $transformer"
118
                        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
119
                        transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
120
                        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); 
121
                        
122
                        // Transformation
123
                        transformer.transform(source, resultat);
124
                        writer.close();
125
                        return true;
126
                } catch (Exception e) {
127
                        org.txm.utils.logger.Log.printStackTrace(e);
128
                        return false;
129
                }
130
        }
131
}