root / tmp / org.txm.core / src / java / org / txm / scripts / importer / INARecords.groovy @ 1688
History | View | Annotate | Download (966 Bytes)
1 | 1000 | mdecorde | package org.txm.scripts.importer
|
---|---|---|---|
2 | 881 | mdecorde | |
3 | 881 | mdecorde | import org.txm.utils.CsvReader; |
4 | 881 | mdecorde | import java.nio.charset.Charset; |
5 | 881 | mdecorde | |
6 | 881 | mdecorde | File csvFile = new File("/home/mdecorde/xml/notices/MARTICE_20130304_TF1_IMAGO_1994-12-31.csv") |
7 | 881 | mdecorde | CsvReader reader = new CsvReader(csvFile.getAbsolutePath(), ";".charAt(0), Charset.forName("UTF-8")) |
8 | 881 | mdecorde | |
9 | 881 | mdecorde | reader.readHeaders() |
10 | 881 | mdecorde | def headers = reader.getHeaders()
|
11 | 881 | mdecorde | int nheader = headers.size()
|
12 | 881 | mdecorde | int nlines = 0; |
13 | 881 | mdecorde | def counts = new int[nheader] |
14 | 881 | mdecorde | def values = [:]
|
15 | 881 | mdecorde | for (String key : headers) values[key] = new HashSet() |
16 | 881 | mdecorde | |
17 | 881 | mdecorde | while (reader.readRecord()) {
|
18 | 881 | mdecorde | nlines++; |
19 | 881 | mdecorde | for (int i = 0 ; i < nheader ; i++) { |
20 | 881 | mdecorde | String key = headers[i]
|
21 | 881 | mdecorde | String str = " "+reader.get(key); |
22 | 881 | mdecorde | //print str
|
23 | 881 | mdecorde | if (str.trim().length() > 0) { |
24 | 881 | mdecorde | counts[i] = counts[i] + 1
|
25 | 881 | mdecorde | values[key] << str.trim() |
26 | 881 | mdecorde | } |
27 | 881 | mdecorde | } |
28 | 881 | mdecorde | //println ""
|
29 | 881 | mdecorde | } |
30 | 881 | mdecorde | |
31 | 881 | mdecorde | println "N lines: "+nlines
|
32 | 881 | mdecorde | println "Empty cols: N <= 20"
|
33 | 881 | mdecorde | for (int i = 0 ; i < nheader ; i++) { |
34 | 881 | mdecorde | |
35 | 881 | mdecorde | if (counts[i] < 30) { |
36 | 881 | mdecorde | println ( headers[i]+ " : "+counts[i])
|
37 | 881 | mdecorde | println values[headers[i]] |
38 | 881 | mdecorde | } |
39 | 881 | mdecorde | } |