root / tmp / org.txm.core / src / java / org / txm / scripts / importer / INARecords.groovy @ 2473
History | View | Annotate | Download (966 Bytes)
1 |
package org.txm.scripts.importer
|
---|---|
2 |
|
3 |
import org.txm.utils.CsvReader; |
4 |
import java.nio.charset.Charset; |
5 |
|
6 |
File csvFile = new File("/home/mdecorde/xml/notices/MARTICE_20130304_TF1_IMAGO_1994-12-31.csv") |
7 |
CsvReader reader = new CsvReader(csvFile.getAbsolutePath(), ";".charAt(0), Charset.forName("UTF-8")) |
8 |
|
9 |
reader.readHeaders() |
10 |
def headers = reader.getHeaders()
|
11 |
int nheader = headers.size()
|
12 |
int nlines = 0; |
13 |
def counts = new int[nheader] |
14 |
def values = [:]
|
15 |
for (String key : headers) values[key] = new HashSet() |
16 |
|
17 |
while (reader.readRecord()) {
|
18 |
nlines++; |
19 |
for (int i = 0 ; i < nheader ; i++) { |
20 |
String key = headers[i]
|
21 |
String str = " "+reader.get(key); |
22 |
//print str
|
23 |
if (str.trim().length() > 0) { |
24 |
counts[i] = counts[i] + 1
|
25 |
values[key] << str.trim() |
26 |
} |
27 |
} |
28 |
//println ""
|
29 |
} |
30 |
|
31 |
println "N lines: "+nlines
|
32 |
println "Empty cols: N <= 20"
|
33 |
for (int i = 0 ; i < nheader ; i++) { |
34 |
|
35 |
if (counts[i] < 30) { |
36 |
println ( headers[i]+ " : "+counts[i])
|
37 |
println values[headers[i]] |
38 |
} |
39 |
} |