Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / importer / INARecords.groovy @ 187

History | View | Annotate | Download (958 Bytes)

1
package org.txm.importer
2

    
3
import org.txm.utils.CsvReader;
4
import java.nio.charset.Charset;
5

    
6
File csvFile = new File("/home/mdecorde/xml/notices/MARTICE_20130304_TF1_IMAGO_1994-12-31.csv")
7
CsvReader reader = new CsvReader(csvFile.getAbsolutePath(), ";".charAt(0), Charset.forName("UTF-8"))
8

    
9
reader.readHeaders()
10
def headers = reader.getHeaders()
11
int nheader = headers.size()
12
int nlines = 0;
13
def counts = new int[nheader]
14
def values = [:]
15
for (String key : headers) values[key] = new HashSet()
16

    
17
while (reader.readRecord()) {
18
        nlines++;
19
        for (int i = 0 ; i < nheader ; i++) {
20
                String key = headers[i]
21
                String str = " "+reader.get(key);
22
                //print str
23
                if (str.trim().length() > 0) {
24
                        counts[i] = counts[i] + 1
25
                        values[key] << str.trim()
26
                }
27
        }
28
        //println ""
29
}
30

    
31
println "N lines: "+nlines
32
println "Empty cols: N <= 20"
33
for (int i = 0 ; i < nheader ; i++) {
34
        
35
        if (counts[i] < 30) {
36
                println ( headers[i]+ " : "+counts[i])
37
                println values[headers[i]]
38
        }
39
}