Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / sw / SearchReplaceXMLInDirectoryTrace.groovy @ 479

History | View | Annotate | Download (2.3 kB)

1
package org.txm.sw
2
// Copyright © - ENS de Lyon - http://textometrie.ens-lyon.fr
3
//
4
// This file is part of the TXM platform.
5
//
6
// The TXM platform is free software: you can redistribute it and/or modify
7
// it under the terms of the GNU General Public License as published by
8
// the Free Software Foundation, either version 3 of the License, or
9
// (at your option) any later version.
10
//
11
// The TXM platform is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
//
16
// You should have received a copy of the GNU General Public License
17
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
18
//
19
// $LastChangedDate: 2013-05-02 10:05:14 +0200 (jeu. 02 mai 2013) $
20
// $LastChangedRevision: 2377 $
21
// $LastChangedBy: sheiden $
22
//
23

    
24
// parameters
25

    
26
dir = new File("/home/sheiden/TXM/sources/Factiva") // **change this parameter**
27
ext = "\\.xml"                                      // **change this parameter**
28
find = /date="([0-9]+)-([0-9]+-[0-9]+)"/            // **change this parameter**
29
replaceWith = 'date="$1-$2" year="$1"'              // **change this parameter**
30

    
31
// main body
32
println "SearchReplaceInDirectory: processing directory '$dir'..."
33
dir.eachFileMatch(~/.*$ext/){ file ->               // for each file matching extension
34
    println "SearchReplaceInDirectory: processing file '$file'..."
35
    println "XML syntax initial check"
36
    if (org.txm.importer.ValidateXml.test(file)) {
37
        println "Search&Replace"
38
        def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".xml", dir) // create temporary file
39
        tmp.write('')                                // create empty file
40
        tmp.withWriter { writer ->
41
            file.eachLine { line ->                  // for each line
42
                writer.println line.replaceAll(find, replaceWith) // find&replace and print
43
            }
44
        }
45
        tmp.renameTo(file)                           // save results
46
        println "XML syntax final check"
47
        if (!org.txm.importer.ValidateXml.test(file)) {
48
            println "** Warning: bad XML syntax for result file '$file'"
49
        }
50
    } else {
51
        println "Skipping file '$file'"
52
    }
53
}