Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / swb / SearchReplaceXMLInDirectoryTrace.groovy @ 1000

History | View | Annotate | Download (2.3 kB)

1 1000 mdecorde
package org.txm.scripts.sw
2 321 mdecorde
// Copyright © - ENS de Lyon - http://textometrie.ens-lyon.fr
3 321 mdecorde
//
4 321 mdecorde
// This file is part of the TXM platform.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it and/or modify
7 321 mdecorde
// it under the terms of the GNU General Public License as published by
8 321 mdecorde
// the Free Software Foundation, either version 3 of the License, or
9 321 mdecorde
// (at your option) any later version.
10 321 mdecorde
//
11 321 mdecorde
// The TXM platform is distributed in the hope that it will be useful,
12 321 mdecorde
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13 321 mdecorde
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 321 mdecorde
// GNU General Public License for more details.
15 321 mdecorde
//
16 321 mdecorde
// You should have received a copy of the GNU General Public License
17 321 mdecorde
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
18 321 mdecorde
//
19 479 mdecorde
// $LastChangedDate: 2013-05-02 10:05:14 +0200 (jeu. 02 mai 2013) $
20 321 mdecorde
// $LastChangedRevision: 2377 $
21 321 mdecorde
// $LastChangedBy: sheiden $
22 321 mdecorde
//
23 321 mdecorde
24 321 mdecorde
// parameters
25 321 mdecorde
26 321 mdecorde
dir = new File("/home/sheiden/TXM/sources/Factiva") // **change this parameter**
27 321 mdecorde
ext = "\\.xml"                                      // **change this parameter**
28 321 mdecorde
find = /date="([0-9]+)-([0-9]+-[0-9]+)"/            // **change this parameter**
29 321 mdecorde
replaceWith = 'date="$1-$2" year="$1"'              // **change this parameter**
30 321 mdecorde
31 321 mdecorde
// main body
32 321 mdecorde
println "SearchReplaceInDirectory: processing directory '$dir'..."
33 321 mdecorde
dir.eachFileMatch(~/.*$ext/){ file ->               // for each file matching extension
34 321 mdecorde
    println "SearchReplaceInDirectory: processing file '$file'..."
35 321 mdecorde
    println "XML syntax initial check"
36 321 mdecorde
    if (org.txm.importer.ValidateXml.test(file)) {
37 321 mdecorde
        println "Search&Replace"
38 321 mdecorde
        def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".xml", dir) // create temporary file
39 321 mdecorde
        tmp.write('')                                // create empty file
40 321 mdecorde
        tmp.withWriter { writer ->
41 321 mdecorde
            file.eachLine { line ->                  // for each line
42 321 mdecorde
                writer.println line.replaceAll(find, replaceWith) // find&replace and print
43 321 mdecorde
            }
44 321 mdecorde
        }
45 321 mdecorde
        tmp.renameTo(file)                           // save results
46 321 mdecorde
        println "XML syntax final check"
47 321 mdecorde
        if (!org.txm.importer.ValidateXml.test(file)) {
48 321 mdecorde
            println "** Warning: bad XML syntax for result file '$file'"
49 321 mdecorde
        }
50 321 mdecorde
    } else {
51 321 mdecorde
        println "Skipping file '$file'"
52 321 mdecorde
    }
53 321 mdecorde
}