root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / swb / SearchReplaceXMLInDirectoryTrace.groovy @ 1000
History | View | Annotate | Download (2.3 kB)
1 | 1000 | mdecorde | package org.txm.scripts.sw
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © - ENS de Lyon - http://textometrie.ens-lyon.fr
|
3 | 321 | mdecorde | //
|
4 | 321 | mdecorde | // This file is part of the TXM platform.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it and/or modify
|
7 | 321 | mdecorde | // it under the terms of the GNU General Public License as published by
|
8 | 321 | mdecorde | // the Free Software Foundation, either version 3 of the License, or
|
9 | 321 | mdecorde | // (at your option) any later version.
|
10 | 321 | mdecorde | //
|
11 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be useful,
|
12 | 321 | mdecorde | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 | 321 | mdecorde | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 | 321 | mdecorde | // GNU General Public License for more details.
|
15 | 321 | mdecorde | //
|
16 | 321 | mdecorde | // You should have received a copy of the GNU General Public License
|
17 | 321 | mdecorde | // along with the TXM platform. If not, see <http://www.gnu.org/licenses/>.
|
18 | 321 | mdecorde | //
|
19 | 479 | mdecorde | // $LastChangedDate: 2013-05-02 10:05:14 +0200 (jeu. 02 mai 2013) $
|
20 | 321 | mdecorde | // $LastChangedRevision: 2377 $
|
21 | 321 | mdecorde | // $LastChangedBy: sheiden $
|
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | |
24 | 321 | mdecorde | // parameters
|
25 | 321 | mdecorde | |
26 | 321 | mdecorde | dir = new File("/home/sheiden/TXM/sources/Factiva") // **change this parameter** |
27 | 321 | mdecorde | ext = "\\.xml" // **change this parameter** |
28 | 321 | mdecorde | find = /date="([0-9]+)-([0-9]+-[0-9]+)"/ // **change this parameter** |
29 | 321 | mdecorde | replaceWith = 'date="$1-$2" year="$1"' // **change this parameter** |
30 | 321 | mdecorde | |
31 | 321 | mdecorde | // main body
|
32 | 321 | mdecorde | println "SearchReplaceInDirectory: processing directory '$dir'..."
|
33 | 321 | mdecorde | dir.eachFileMatch(~/.*$ext/){ file -> // for each file matching extension |
34 | 321 | mdecorde | println "SearchReplaceInDirectory: processing file '$file'..."
|
35 | 321 | mdecorde | println "XML syntax initial check"
|
36 | 321 | mdecorde | if (org.txm.importer.ValidateXml.test(file)) {
|
37 | 321 | mdecorde | println "Search&Replace"
|
38 | 321 | mdecorde | def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".xml", dir) // create temporary file |
39 | 321 | mdecorde | tmp.write('') // create empty file |
40 | 321 | mdecorde | tmp.withWriter { writer -> |
41 | 321 | mdecorde | file.eachLine { line -> // for each line
|
42 | 321 | mdecorde | writer.println line.replaceAll(find, replaceWith) // find&replace and print
|
43 | 321 | mdecorde | } |
44 | 321 | mdecorde | } |
45 | 321 | mdecorde | tmp.renameTo(file) // save results
|
46 | 321 | mdecorde | println "XML syntax final check"
|
47 | 321 | mdecorde | if (!org.txm.importer.ValidateXml.test(file)) {
|
48 | 321 | mdecorde | println "** Warning: bad XML syntax for result file '$file'"
|
49 | 321 | mdecorde | } |
50 | 321 | mdecorde | } else {
|
51 | 321 | mdecorde | println "Skipping file '$file'"
|
52 | 321 | mdecorde | } |
53 | 321 | mdecorde | } |