Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / RemoveTag.groovy @ 2473

History | View | Annotate | Download (5.3 kB)

1 881 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 881 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 881 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 881 mdecorde
// Sophia Antipolis, University of Paris 3.
5 881 mdecorde
//
6 881 mdecorde
// The TXM platform is free software: you can redistribute it
7 881 mdecorde
// and/or modify it under the terms of the GNU General Public
8 881 mdecorde
// License as published by the Free Software Foundation,
9 881 mdecorde
// either version 2 of the License, or (at your option) any
10 881 mdecorde
// later version.
11 881 mdecorde
//
12 881 mdecorde
// The TXM platform is distributed in the hope that it will be
13 881 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 881 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 881 mdecorde
// PURPOSE. See the GNU General Public License for more
16 881 mdecorde
// details.
17 881 mdecorde
//
18 881 mdecorde
// You should have received a copy of the GNU General
19 881 mdecorde
// Public License along with the TXM platform. If not, see
20 881 mdecorde
// http://www.gnu.org/licenses.
21 881 mdecorde
//
22 881 mdecorde
//
23 881 mdecorde
//
24 881 mdecorde
// $LastChangedDate:$
25 881 mdecorde
// $LastChangedRevision:$
26 881 mdecorde
// $LastChangedBy:$
27 881 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.importer;
29 881 mdecorde
30 881 mdecorde
import javax.xml.parsers.*
31 881 mdecorde
import javax.xml.transform.*
32 881 mdecorde
import javax.xml.transform.dom.DOMSource
33 881 mdecorde
import javax.xml.transform.stream.StreamResult
34 881 mdecorde
import javax.xml.xpath.*
35 1000 mdecorde
import org.txm.importer.PersonalNamespaceContext
36 881 mdecorde
import org.txm.metadatas.*
37 881 mdecorde
import org.txm.utils.*
38 1000 mdecorde
import org.txm.utils.xml.*
39 881 mdecorde
import org.w3c.dom.Document
40 881 mdecorde
import org.w3c.dom.Element
41 881 mdecorde
42 881 mdecorde
/**
43 881 mdecorde
 * Removes tags of XML file given a XPath.
44 881 mdecorde
 * @author mdecorde
45 881 mdecorde
 *
46 881 mdecorde
 */
47 881 mdecorde
public class RemoveTag {
48 881 mdecorde
        File outfile;
49 881 mdecorde
50 881 mdecorde
        /** The doc. */
51 881 mdecorde
        Document doc;
52 881 mdecorde
53 881 mdecorde
        /**
54 881 mdecorde
         *
55 881 mdecorde
         * @param xmlfile the xmlfile
56 881 mdecorde
         * @param outfile the outfile
57 881 mdecorde
         * @param xpath the XPath
58 881 mdecorde
         */
59 881 mdecorde
        public RemoveTag(File xmlfile, File outfile, String xpath)
60 881 mdecorde
        {
61 881 mdecorde
                this.outfile = outfile;
62 881 mdecorde
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
63 881 mdecorde
                domFactory.setNamespaceAware(true); // never forget this!
64 881 mdecorde
                domFactory.setXIncludeAware(true);
65 881 mdecorde
                DocumentBuilder builder = domFactory.newDocumentBuilder();
66 881 mdecorde
                doc = builder.parse(xmlfile);
67 881 mdecorde
68 881 mdecorde
                def expr = XPathFactory.newInstance().newXPath().compile(xpath);
69 881 mdecorde
                def nodes = expr.evaluate(doc, XPathConstants.NODESET);
70 881 mdecorde
71 881 mdecorde
                if (nodes != null)
72 881 mdecorde
                for(def node : nodes)
73 881 mdecorde
                {
74 881 mdecorde
                        //println "Remove node "+node
75 881 mdecorde
                        Element elem = (Element)node;
76 881 mdecorde
                        elem.getParentNode().removeChild(node);
77 881 mdecorde
                }
78 881 mdecorde
                save()
79 881 mdecorde
                doc = null
80 881 mdecorde
        }
81 881 mdecorde
82 881 mdecorde
        /**
83 881 mdecorde
         * Save.
84 881 mdecorde
         *
85 881 mdecorde
         * @return true, if successful
86 881 mdecorde
         */
87 881 mdecorde
        private boolean save()
88 881 mdecorde
        {
89 881 mdecorde
                try {
90 881 mdecorde
                        // Création de la source DOM
91 881 mdecorde
                        Source source = new DOMSource(doc);
92 881 mdecorde
93 881 mdecorde
                        // Création du fichier de sortie
94 881 mdecorde
                                Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
95 881 mdecorde
                        Result resultat = new StreamResult(writer);
96 881 mdecorde
97 881 mdecorde
                        // Configuration du transformer
98 881 mdecorde
                        TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
99 881 mdecorde
                        Transformer transformer = fabrique.newTransformer();
100 881 mdecorde
                        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
101 881 mdecorde
                        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
102 881 mdecorde
                        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
103 881 mdecorde
104 881 mdecorde
                        // Transformation
105 881 mdecorde
                        transformer.transform(source, resultat);
106 881 mdecorde
                        writer.close();
107 881 mdecorde
                        return true;
108 881 mdecorde
                } catch (Exception e) {
109 881 mdecorde
                        org.txm.utils.logger.Log.printStackTrace(e);
110 881 mdecorde
                        return false;
111 881 mdecorde
                }
112 881 mdecorde
        }
113 881 mdecorde
114 1000 mdecorde
        public static boolean xpath(File xmlfile, String xpath)
115 1000 mdecorde
        {
116 1000 mdecorde
                if (!xmlfile.exists()) {
117 1000 mdecorde
                        println "Error: $xmlfile does not exists"
118 1000 mdecorde
                }
119 1000 mdecorde
120 1000 mdecorde
                if (!(xmlfile.canRead() && xmlfile.canWrite())) {
121 1000 mdecorde
                        println "Error: $xmlfile is not readable or writable"
122 1000 mdecorde
                }
123 1000 mdecorde
124 1000 mdecorde
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
125 1000 mdecorde
                domFactory.setNamespaceAware(true); // never forget this!
126 1000 mdecorde
                domFactory.setXIncludeAware(true);
127 1000 mdecorde
                DocumentBuilder builder = domFactory.newDocumentBuilder();
128 1000 mdecorde
                Document doc = builder.parse(xmlfile);
129 1000 mdecorde
130 1000 mdecorde
                XPathFactory xpathfactory = XPathFactory.newInstance();
131 1000 mdecorde
                def _xpath = xpathfactory.newXPath();
132 1000 mdecorde
                _xpath.setNamespaceContext(new PersonalNamespaceContext());
133 1000 mdecorde
                def expr = _xpath.compile(xpath);
134 1000 mdecorde
                def nodes = expr.evaluate(doc, XPathConstants.NODESET);
135 1000 mdecorde
136 1000 mdecorde
                for(Element node : nodes) {
137 1000 mdecorde
                        def parent = node.getParentNode();
138 1000 mdecorde
                        if (parent != null)
139 1000 mdecorde
                                parent.removeChild(node)
140 1000 mdecorde
                }
141 1000 mdecorde
                return DomUtils.save(doc, xmlfile)
142 1000 mdecorde
        }
143 1000 mdecorde
144 1000 mdecorde
        public static boolean xpath(File xmlfile, List<String> xpaths)
145 1000 mdecorde
        {
146 1000 mdecorde
                if (!xmlfile.exists()) {
147 1000 mdecorde
                        println "Error: $xmlfile does not exists"
148 1000 mdecorde
                }
149 1000 mdecorde
150 1000 mdecorde
                if (!(xmlfile.canRead() && xmlfile.canWrite())) {
151 1000 mdecorde
                        println "Error: $xmlfile is not readable or writable"
152 1000 mdecorde
                }
153 1000 mdecorde
154 1000 mdecorde
                DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
155 1000 mdecorde
                domFactory.setNamespaceAware(true); // never forget this!
156 1000 mdecorde
                domFactory.setXIncludeAware(true);
157 1000 mdecorde
                DocumentBuilder builder = domFactory.newDocumentBuilder();
158 1000 mdecorde
                Document doc = builder.parse(xmlfile);
159 1000 mdecorde
                XPathFactory xpathfactory = XPathFactory.newInstance();
160 1000 mdecorde
161 1000 mdecorde
                for (String xpath : xpaths) {
162 1000 mdecorde
                        def _xpath = xpathfactory.newXPath();
163 1000 mdecorde
                        _xpath.setNamespaceContext(new PersonalNamespaceContext());
164 1000 mdecorde
                        def expr = _xpath.compile(xpath);
165 1000 mdecorde
                        def nodes = expr.evaluate(doc, XPathConstants.NODESET);
166 1000 mdecorde
167 1000 mdecorde
                        for (Element node : nodes) {
168 1000 mdecorde
                                def parent = node.getParentNode();
169 1000 mdecorde
                                if (parent != null)
170 1000 mdecorde
                                        parent.removeChild(node)
171 1000 mdecorde
                        }
172 1000 mdecorde
                }
173 1000 mdecorde
                return DomUtils.save(doc, xmlfile)
174 1000 mdecorde
175 1000 mdecorde
        }
176 1000 mdecorde
177 881 mdecorde
        public static void main(String[] args) {
178 881 mdecorde
                RemoveTag rt = new RemoveTag(
179 881 mdecorde
                        new File("/home/mdecorde/TXM/corpora/graal/import.xml"),
180 881 mdecorde
                        new File("/home/mdecorde/TXM/corpora/graal/import-o.xml"),
181 881 mdecorde
                        "//edition[@name='courante']"
182 881 mdecorde
                        )
183 881 mdecorde
        }
184 881 mdecorde
}