Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / CleanFile.groovy @ 187

History | View | Annotate | Download (3.1 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

    
22
//
23
// This file is part of the TXM platform.
24
//
25
// The TXM platform is free software: you can redistribute it and/or modif y
26
// it under the terms of the GNU General Public License as published by
27
// the Free Software Foundation, either version 3 of the License, or
28
// (at your option) any later version.
29
//
30
// The TXM platform is distributed in the hope that it will be useful,
31
// but WITHOUT ANY WARRANTY; without even the implied warranty of
32
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33
// GNU General Public License for more details.
34
//
35
// You should have received a copy of the GNU General Public License
36
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
37
// 
38
// 
39
// 
40
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
41
// $LastChangedRevision: 2386 $
42
// $LastChangedBy: mdecorde $ 
43
//
44
package org.txm.importer;
45

    
46
import java.io.BufferedWriter;
47
import java.io.File;
48

    
49
// TODO: Auto-generated Javadoc
50
/**
51
 * remove all ctrl char and replace \t by ' '.
52
 *
53
 * @param infile : the file to process
54
 * @param encoding : the encoding of the file
55
 */
56
class CleanFile {
57
        public CleanFile(File infile, String encoding){
58
                Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(infile),encoding));
59
                File outfile = File.createTempFile("filecleaner","123456", infile.getParentFile());
60
                Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
61
                String line = reader.readLine();
62
                while(line != null)
63
                {
64
                        writer.write(clean(line) +"\n")
65
                        line = reader.readLine();
66
                }
67
                writer.close();
68
                reader.close();
69
                
70
                infile.delete(); // erase old file
71
                outfile.renameTo(infile); // rename the copy
72
        }
73

    
74
        /**
75
         * Clean.
76
         *
77
         * @param str the str
78
         * @return the string
79
         */
80
        public static String clean(String str) {
81
                str = str.replace("\t", " ");// replace \t by ' '
82
                str = str.replaceAll("\\p{Cntrl}", ""); // remove all ctrl char
83
                return str;
84
        }
85

    
86
        /**
87
         * The main method.
88
         *
89
         * @param args the arguments
90
         */
91
        public static void main(String[] args){
92
                File infile = new File("C:\\Documents and Settings\\H\\xml\\cleaner\\test.txt")                
93
                String encoding = "CP1252";
94
                new CleanFile(infile, encoding);
95
        }
96
}