Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / RemoveTag.groovy @ 1000

History | View | Annotate | Download (5.3 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.scripts.importer;
29

    
30
import javax.xml.parsers.*
31
import javax.xml.transform.*
32
import javax.xml.transform.dom.DOMSource
33
import javax.xml.transform.stream.StreamResult
34
import javax.xml.xpath.*
35
import org.txm.importer.PersonalNamespaceContext
36
import org.txm.metadatas.*
37
import org.txm.utils.*
38
import org.txm.utils.xml.*
39
import org.w3c.dom.Document
40
import org.w3c.dom.Element
41

    
42
/**
43
 * Removes tags of XML file given a XPath. 
44
 * @author mdecorde
45
 *
46
 */
47
public class RemoveTag {
48
        File outfile;
49
        
50
        /** The doc. */
51
        Document doc;
52
        
53
        /**
54
         *
55
         * @param xmlfile the xmlfile
56
         * @param outfile the outfile
57
         * @param xpath the XPath
58
         */
59
        public RemoveTag(File xmlfile, File outfile, String xpath)
60
        {
61
                this.outfile = outfile;
62
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
63
                domFactory.setNamespaceAware(true); // never forget this!
64
                domFactory.setXIncludeAware(true);
65
                DocumentBuilder builder = domFactory.newDocumentBuilder();
66
                doc = builder.parse(xmlfile);
67
                
68
                def expr = XPathFactory.newInstance().newXPath().compile(xpath);
69
                def nodes = expr.evaluate(doc, XPathConstants.NODESET);
70
                
71
                if (nodes != null)
72
                for(def node : nodes)
73
                {
74
                        //println "Remove node "+node
75
                        Element elem = (Element)node;
76
                        elem.getParentNode().removeChild(node);
77
                }
78
                save()
79
                doc = null
80
        }
81
        
82
        /**
83
         * Save.
84
         *
85
         * @return true, if successful
86
         */
87
        private boolean save()
88
        {
89
                try {
90
                        // Création de la source DOM
91
                        Source source = new DOMSource(doc);
92
                        
93
                        // Création du fichier de sortie
94
                                Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); 
95
                        Result resultat = new StreamResult(writer);
96
                        
97
                        // Configuration du transformer
98
                        TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
99
                        Transformer transformer = fabrique.newTransformer();
100
                        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
101
                        transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
102
                        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); 
103
                        
104
                        // Transformation
105
                        transformer.transform(source, resultat);
106
                        writer.close();
107
                        return true;
108
                } catch (Exception e) {
109
                        org.txm.utils.logger.Log.printStackTrace(e);
110
                        return false;
111
                }
112
        }
113
        
114
        public static boolean xpath(File xmlfile, String xpath)
115
        {
116
                if (!xmlfile.exists()) {
117
                        println "Error: $xmlfile does not exists"
118
                }
119

    
120
                if (!(xmlfile.canRead() && xmlfile.canWrite())) {
121
                        println "Error: $xmlfile is not readable or writable"
122
                }
123

    
124
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
125
                domFactory.setNamespaceAware(true); // never forget this!
126
                domFactory.setXIncludeAware(true);
127
                DocumentBuilder builder = domFactory.newDocumentBuilder();
128
                Document doc = builder.parse(xmlfile);
129

    
130
                XPathFactory xpathfactory = XPathFactory.newInstance();
131
                def _xpath = xpathfactory.newXPath();
132
                _xpath.setNamespaceContext(new PersonalNamespaceContext());
133
                def expr = _xpath.compile(xpath);
134
                def nodes = expr.evaluate(doc, XPathConstants.NODESET);
135

    
136
                for(Element node : nodes) {
137
                        def parent = node.getParentNode();
138
                        if (parent != null)
139
                                parent.removeChild(node)
140
                }
141
                return DomUtils.save(doc, xmlfile)
142
        }
143

    
144
        public static boolean xpath(File xmlfile, List<String> xpaths)
145
        {
146
                if (!xmlfile.exists()) {
147
                        println "Error: $xmlfile does not exists"
148
                }
149

    
150
                if (!(xmlfile.canRead() && xmlfile.canWrite())) {
151
                        println "Error: $xmlfile is not readable or writable"
152
                }
153

    
154
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
155
                domFactory.setNamespaceAware(true); // never forget this!
156
                domFactory.setXIncludeAware(true);
157
                DocumentBuilder builder = domFactory.newDocumentBuilder();
158
                Document doc = builder.parse(xmlfile);
159
                XPathFactory xpathfactory = XPathFactory.newInstance();
160

    
161
                for (String xpath : xpaths) {
162
                        def _xpath = xpathfactory.newXPath();
163
                        _xpath.setNamespaceContext(new PersonalNamespaceContext());
164
                        def expr = _xpath.compile(xpath);
165
                        def nodes = expr.evaluate(doc, XPathConstants.NODESET);
166

    
167
                        for (Element node : nodes) {
168
                                def parent = node.getParentNode();
169
                                if (parent != null)
170
                                        parent.removeChild(node)
171
                        }
172
                }
173
                return DomUtils.save(doc, xmlfile)
174

    
175
        }
176
        
177
        public static void main(String[] args) {
178
                RemoveTag rt = new RemoveTag(
179
                        new File("/home/mdecorde/TXM/corpora/graal/import.xml"),
180
                        new File("/home/mdecorde/TXM/corpora/graal/import-o.xml"),
181
                        "//edition[@name='courante']"
182
                        )
183
        }
184
}