root / tmp / org.txm.core / src / java / org / txm / scripts / importer / RemoveTag.groovy @ 2473
History | View | Annotate | Download (5.3 kB)
1 |
// Copyright © 2010-2013 ENS de Lyon.
|
---|---|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice
|
4 |
// Sophia Antipolis, University of Paris 3.
|
5 |
//
|
6 |
// The TXM platform is free software: you can redistribute it
|
7 |
// and/or modify it under the terms of the GNU General Public
|
8 |
// License as published by the Free Software Foundation,
|
9 |
// either version 2 of the License, or (at your option) any
|
10 |
// later version.
|
11 |
//
|
12 |
// The TXM platform is distributed in the hope that it will be
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 |
// PURPOSE. See the GNU General Public License for more
|
16 |
// details.
|
17 |
//
|
18 |
// You should have received a copy of the GNU General
|
19 |
// Public License along with the TXM platform. If not, see
|
20 |
// http://www.gnu.org/licenses.
|
21 |
//
|
22 |
//
|
23 |
//
|
24 |
// $LastChangedDate:$
|
25 |
// $LastChangedRevision:$
|
26 |
// $LastChangedBy:$
|
27 |
//
|
28 |
package org.txm.scripts.importer;
|
29 |
|
30 |
import javax.xml.parsers.* |
31 |
import javax.xml.transform.* |
32 |
import javax.xml.transform.dom.DOMSource |
33 |
import javax.xml.transform.stream.StreamResult |
34 |
import javax.xml.xpath.* |
35 |
import org.txm.importer.PersonalNamespaceContext |
36 |
import org.txm.metadatas.* |
37 |
import org.txm.utils.* |
38 |
import org.txm.utils.xml.* |
39 |
import org.w3c.dom.Document |
40 |
import org.w3c.dom.Element |
41 |
|
42 |
/**
|
43 |
* Removes tags of XML file given a XPath.
|
44 |
* @author mdecorde
|
45 |
*
|
46 |
*/
|
47 |
public class RemoveTag { |
48 |
File outfile;
|
49 |
|
50 |
/** The doc. */
|
51 |
Document doc;
|
52 |
|
53 |
/**
|
54 |
*
|
55 |
* @param xmlfile the xmlfile
|
56 |
* @param outfile the outfile
|
57 |
* @param xpath the XPath
|
58 |
*/
|
59 |
public RemoveTag(File xmlfile, File outfile, String xpath) |
60 |
{ |
61 |
this.outfile = outfile;
|
62 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
63 |
domFactory.setNamespaceAware(true); // never forget this! |
64 |
domFactory.setXIncludeAware(true);
|
65 |
DocumentBuilder builder = domFactory.newDocumentBuilder();
|
66 |
doc = builder.parse(xmlfile); |
67 |
|
68 |
def expr = XPathFactory.newInstance().newXPath().compile(xpath); |
69 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
70 |
|
71 |
if (nodes != null) |
72 |
for(def node : nodes) |
73 |
{ |
74 |
//println "Remove node "+node
|
75 |
Element elem = (Element)node; |
76 |
elem.getParentNode().removeChild(node); |
77 |
} |
78 |
save() |
79 |
doc = null
|
80 |
} |
81 |
|
82 |
/**
|
83 |
* Save.
|
84 |
*
|
85 |
* @return true, if successful
|
86 |
*/
|
87 |
private boolean save() |
88 |
{ |
89 |
try {
|
90 |
// Création de la source DOM
|
91 |
Source source = new DOMSource(doc); |
92 |
|
93 |
// Création du fichier de sortie
|
94 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
95 |
Result resultat = new StreamResult(writer); |
96 |
|
97 |
// Configuration du transformer
|
98 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
99 |
Transformer transformer = fabrique.newTransformer();
|
100 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
101 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
102 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
103 |
|
104 |
// Transformation
|
105 |
transformer.transform(source, resultat); |
106 |
writer.close(); |
107 |
return true; |
108 |
} catch (Exception e) { |
109 |
org.txm.utils.logger.Log.printStackTrace(e); |
110 |
return false; |
111 |
} |
112 |
} |
113 |
|
114 |
public static boolean xpath(File xmlfile, String xpath) |
115 |
{ |
116 |
if (!xmlfile.exists()) {
|
117 |
println "Error: $xmlfile does not exists"
|
118 |
} |
119 |
|
120 |
if (!(xmlfile.canRead() && xmlfile.canWrite())) {
|
121 |
println "Error: $xmlfile is not readable or writable"
|
122 |
} |
123 |
|
124 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
125 |
domFactory.setNamespaceAware(true); // never forget this! |
126 |
domFactory.setXIncludeAware(true);
|
127 |
DocumentBuilder builder = domFactory.newDocumentBuilder();
|
128 |
Document doc = builder.parse(xmlfile);
|
129 |
|
130 |
XPathFactory xpathfactory = XPathFactory.newInstance(); |
131 |
def _xpath = xpathfactory.newXPath();
|
132 |
_xpath.setNamespaceContext(new PersonalNamespaceContext());
|
133 |
def expr = _xpath.compile(xpath);
|
134 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
135 |
|
136 |
for(Element node : nodes) { |
137 |
def parent = node.getParentNode();
|
138 |
if (parent != null) |
139 |
parent.removeChild(node) |
140 |
} |
141 |
return DomUtils.save(doc, xmlfile)
|
142 |
} |
143 |
|
144 |
public static boolean xpath(File xmlfile, List<String> xpaths) |
145 |
{ |
146 |
if (!xmlfile.exists()) {
|
147 |
println "Error: $xmlfile does not exists"
|
148 |
} |
149 |
|
150 |
if (!(xmlfile.canRead() && xmlfile.canWrite())) {
|
151 |
println "Error: $xmlfile is not readable or writable"
|
152 |
} |
153 |
|
154 |
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); |
155 |
domFactory.setNamespaceAware(true); // never forget this! |
156 |
domFactory.setXIncludeAware(true);
|
157 |
DocumentBuilder builder = domFactory.newDocumentBuilder();
|
158 |
Document doc = builder.parse(xmlfile);
|
159 |
XPathFactory xpathfactory = XPathFactory.newInstance(); |
160 |
|
161 |
for (String xpath : xpaths) { |
162 |
def _xpath = xpathfactory.newXPath();
|
163 |
_xpath.setNamespaceContext(new PersonalNamespaceContext());
|
164 |
def expr = _xpath.compile(xpath);
|
165 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET); |
166 |
|
167 |
for (Element node : nodes) { |
168 |
def parent = node.getParentNode();
|
169 |
if (parent != null) |
170 |
parent.removeChild(node) |
171 |
} |
172 |
} |
173 |
return DomUtils.save(doc, xmlfile)
|
174 |
|
175 |
} |
176 |
|
177 |
public static void main(String[] args) { |
178 |
RemoveTag rt = new RemoveTag(
|
179 |
new File("/home/mdecorde/TXM/corpora/graal/import.xml"), |
180 |
new File("/home/mdecorde/TXM/corpora/graal/import-o.xml"), |
181 |
"//edition[@name='courante']"
|
182 |
) |
183 |
} |
184 |
} |