Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / swb / SearchReplaceInDirectoryTrace.groovy @ 1000

History | View | Annotate | Download (2.3 kB)

1
package org.txm.scripts.sw
2
// Copyright © - ENS de Lyon - http://textometrie.ens-lyon.fr
3
//
4
// This file is part of the TXM platform.
5
//
6
// The TXM platform is free software: you can redistribute it and/or modify
7
// it under the terms of the GNU General Public License as published by
8
// the Free Software Foundation, either version 3 of the License, or
9
// (at your option) any later version.
10
//
11
// The TXM platform is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
//
16
// You should have received a copy of the GNU General Public License
17
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
18
//
19
// $LastChangedDate$
20
// $LastChangedRevision$
21
// $LastChangedBy$
22
//
23

    
24
// parameters
25

    
26
dir = new File("/home/sheiden/TXM/sources/Factiva") // **change this parameter**
27
ext = "\\.xml"                                      // **change this parameter**
28
find = /date="([0-9]+)-([0-9]+-[0-9]+)"/            // **change this parameter**
29
replaceWith = 'date="$1-$2" year="$1"'              // **change this parameter**
30
encoding = "cp1252"
31
test = false // XML Validation
32

    
33
// main body
34
println "SearchReplaceInDirectory: processing directory '$dir'..."
35
dir.eachFileMatch(~/.*$ext/) { file ->               // for each file matching extension
36
    println "SearchReplaceInDirectory: processing file '$file'..."
37
    println "XML syntax initial check"
38
    if (org.txm.importer.ValidateXml.test(file)) {
39
        println "Search&Replace"
40
        def tmp = File.createTempFile("SearchReplaceInDirectoryTemp", ".xml", dir) // create temporary file
41
        tmp.withWriter(encoding) { writer ->
42
            file.eachLine(encoding) { line ->                  // for each line
43
                writer.println line.replaceAll(find, replaceWith) // find&replace and print
44
            }
45
        }
46
        file.delete()
47
        tmp.renameTo(file)                           // save results
48
        
49
        println "XML syntax final check"
50
        if (test && !org.txm.importer.ValidateXml.test(file)) {
51
            println "** Warning: bad XML syntax for result file '$file'"
52
        }
53
    } else {
54
        println "Skipping file '$file'"
55
    }
56
}