Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / scripts / importer / INARecords.groovy @ 2473

History | View | Annotate | Download (966 Bytes)

1 1000 mdecorde
package org.txm.scripts.importer
2 881 mdecorde
3 881 mdecorde
import org.txm.utils.CsvReader;
4 881 mdecorde
import java.nio.charset.Charset;
5 881 mdecorde
6 881 mdecorde
File csvFile = new File("/home/mdecorde/xml/notices/MARTICE_20130304_TF1_IMAGO_1994-12-31.csv")
7 881 mdecorde
CsvReader reader = new CsvReader(csvFile.getAbsolutePath(), ";".charAt(0), Charset.forName("UTF-8"))
8 881 mdecorde
9 881 mdecorde
reader.readHeaders()
10 881 mdecorde
def headers = reader.getHeaders()
11 881 mdecorde
int nheader = headers.size()
12 881 mdecorde
int nlines = 0;
13 881 mdecorde
def counts = new int[nheader]
14 881 mdecorde
def values = [:]
15 881 mdecorde
for (String key : headers) values[key] = new HashSet()
16 881 mdecorde
17 881 mdecorde
while (reader.readRecord()) {
18 881 mdecorde
        nlines++;
19 881 mdecorde
        for (int i = 0 ; i < nheader ; i++) {
20 881 mdecorde
                String key = headers[i]
21 881 mdecorde
                String str = " "+reader.get(key);
22 881 mdecorde
                //print str
23 881 mdecorde
                if (str.trim().length() > 0) {
24 881 mdecorde
                        counts[i] = counts[i] + 1
25 881 mdecorde
                        values[key] << str.trim()
26 881 mdecorde
                }
27 881 mdecorde
        }
28 881 mdecorde
        //println ""
29 881 mdecorde
}
30 881 mdecorde
31 881 mdecorde
println "N lines: "+nlines
32 881 mdecorde
println "Empty cols: N <= 20"
33 881 mdecorde
for (int i = 0 ; i < nheader ; i++) {
34 881 mdecorde
35 881 mdecorde
        if (counts[i] < 30) {
36 881 mdecorde
                println ( headers[i]+ " : "+counts[i])
37 881 mdecorde
                println values[headers[i]]
38 881 mdecorde
        }
39 881 mdecorde
}