root / tmp / org.txm.core / src / java / org / txm / scripts / importer / CsvToXml.groovy @ 2473
History | View | Annotate | Download (5.3 kB)
1 | 881 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 881 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 881 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 881 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 881 | mdecorde | //
|
6 | 881 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 881 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 881 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 881 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 881 | mdecorde | // later version.
|
11 | 881 | mdecorde | //
|
12 | 881 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 881 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 881 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 881 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 881 | mdecorde | // details.
|
17 | 881 | mdecorde | //
|
18 | 881 | mdecorde | // You should have received a copy of the GNU General
|
19 | 881 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 881 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 881 | mdecorde | //
|
22 | 881 | mdecorde | //
|
23 | 881 | mdecorde | //
|
24 | 881 | mdecorde | // $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $
|
25 | 881 | mdecorde | // $LastChangedRevision: 3087 $
|
26 | 881 | mdecorde | // $LastChangedBy: mdecorde $
|
27 | 881 | mdecorde | //
|
28 | 1000 | mdecorde | package org.txm.scripts.importer
|
29 | 881 | mdecorde | |
30 | 881 | mdecorde | import java.io.FileInputStream |
31 | 881 | mdecorde | import java.io.InputStreamReader |
32 | 881 | mdecorde | import java.nio.charset.Charset |
33 | 881 | mdecorde | import org.txm.utils.* |
34 | 881 | mdecorde | |
35 | 881 | mdecorde | import javax.xml.stream.* |
36 | 881 | mdecorde | import java.net.URL |
37 | 881 | mdecorde | |
38 | 881 | mdecorde | /**
|
39 | 881 | mdecorde | * The Class CsvToXml: allow to convert CSV files to XML files
|
40 | 881 | mdecorde | *
|
41 | 881 | mdecorde | * @author mdecorde
|
42 | 881 | mdecorde | */
|
43 | 881 | mdecorde | class CsvToXml { |
44 | 881 | mdecorde | |
45 | 881 | mdecorde | /** The titles. */
|
46 | 881 | mdecorde | File xmlfile
|
47 | 881 | mdecorde | List<String> titles = [] |
48 | 881 | mdecorde | |
49 | 881 | mdecorde | private CsvReader getCSVReader(File csvfile, String fieldSeparator, String encoding) { |
50 | 881 | mdecorde | CsvReader csvreader = new CsvReader(csvfile.getAbsolutePath(), fieldSeparator.charAt(0), Charset.forName(encoding)) |
51 | 881 | mdecorde | |
52 | 881 | mdecorde | //get titles
|
53 | 881 | mdecorde | csvreader.readHeaders() |
54 | 881 | mdecorde | titles = []
|
55 | 881 | mdecorde | for (String title : csvreader.getHeaders()) { |
56 | 881 | mdecorde | titles << title |
57 | 881 | mdecorde | } |
58 | 881 | mdecorde | |
59 | 881 | mdecorde | if (titles.size() == 0) { |
60 | 881 | mdecorde | println "the CSV file as no column"
|
61 | 881 | mdecorde | return null |
62 | 881 | mdecorde | } |
63 | 881 | mdecorde | |
64 | 881 | mdecorde | if (titles.contains("text") == 0) { |
65 | 881 | mdecorde | println "the CSV file as no 'text' column"
|
66 | 881 | mdecorde | return null |
67 | 881 | mdecorde | } |
68 | 881 | mdecorde | |
69 | 881 | mdecorde | if (titles.contains("id") == 0) { |
70 | 881 | mdecorde | println "the CSV file as no 'id' column"
|
71 | 881 | mdecorde | return null |
72 | 881 | mdecorde | } |
73 | 881 | mdecorde | |
74 | 881 | mdecorde | return csvreader
|
75 | 881 | mdecorde | } |
76 | 881 | mdecorde | |
77 | 881 | mdecorde | /**
|
78 | 881 | mdecorde | * Convert a CSV file to ONE XML file using the first line to declare metadata
|
79 | 881 | mdecorde | *
|
80 | 881 | mdecorde | * @param csvfile the csvfile
|
81 | 881 | mdecorde | * @param outfile the outfile
|
82 | 881 | mdecorde | * @param fieldSeparator the field separator
|
83 | 881 | mdecorde | * @param textSeparator the text separator
|
84 | 881 | mdecorde | * @param encoding the encoding
|
85 | 881 | mdecorde | * @return true, if successful
|
86 | 881 | mdecorde | */
|
87 | 881 | mdecorde | public boolean toOneXMLFile(File csvfile, File xmlfile, String fieldSeparator, String textSeparator, String encoding) { |
88 | 881 | mdecorde | CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding) |
89 | 881 | mdecorde | if (csvreader == null) return false |
90 | 881 | mdecorde | |
91 | 881 | mdecorde | println "Metadata properties declared: "+titles
|
92 | 881 | mdecorde | |
93 | 881 | mdecorde | XMLOutputFactory factory = XMLOutputFactory.newInstance() |
94 | 881 | mdecorde | FileOutputStream output = new FileOutputStream(xmlfile) |
95 | 881 | mdecorde | XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
|
96 | 881 | mdecorde | |
97 | 881 | mdecorde | writer.writeStartDocument("UTF-8","1.0") |
98 | 881 | mdecorde | writer.writeStartElement("text")
|
99 | 881 | mdecorde | while (csvreader.readRecord()) {
|
100 | 881 | mdecorde | |
101 | 881 | mdecorde | writer.writeStartElement("div")
|
102 | 881 | mdecorde | |
103 | 881 | mdecorde | for (int i = 0 ; i < titles.size() ; i++) { |
104 | 881 | mdecorde | if (titles[i] == "text") continue |
105 | 881 | mdecorde | writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i)) |
106 | 881 | mdecorde | } |
107 | 881 | mdecorde | |
108 | 881 | mdecorde | writer.writeCharacters(csvreader.get("text")) // get text content |
109 | 881 | mdecorde | writer.writeEndElement() // div
|
110 | 881 | mdecorde | } |
111 | 881 | mdecorde | |
112 | 881 | mdecorde | writer.writeEndElement() // text
|
113 | 881 | mdecorde | writer.close() |
114 | 881 | mdecorde | output.close() |
115 | 881 | mdecorde | csvreader.close() |
116 | 881 | mdecorde | |
117 | 881 | mdecorde | return true; |
118 | 881 | mdecorde | } |
119 | 881 | mdecorde | |
120 | 881 | mdecorde | /**
|
121 | 881 | mdecorde | * Convert a CSV file to SEVERAL XML file using the first line to declare metadata
|
122 | 881 | mdecorde | *
|
123 | 881 | mdecorde | * @param csvfile the csvfile
|
124 | 881 | mdecorde | * @param outfile the outfile
|
125 | 881 | mdecorde | * @param fieldSeparator the field separator
|
126 | 881 | mdecorde | * @param textSeparator the text separator
|
127 | 881 | mdecorde | * @param encoding the encoding
|
128 | 881 | mdecorde | * @return true, if successful
|
129 | 881 | mdecorde | */
|
130 | 881 | mdecorde | public boolean toMultipleXMLFiles(File csvfile, File outDir, String fieldSeparator, String textSeparator, String encoding) { |
131 | 881 | mdecorde | CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding) |
132 | 881 | mdecorde | if (csvreader == null) return false |
133 | 881 | mdecorde | |
134 | 881 | mdecorde | if (!outDir.exists()) outDir.mkdirs()
|
135 | 881 | mdecorde | if (!outDir.exists()) {
|
136 | 881 | mdecorde | println "Out directory does not exist and could not create it"
|
137 | 881 | mdecorde | return false; |
138 | 881 | mdecorde | } |
139 | 881 | mdecorde | |
140 | 881 | mdecorde | while (csvreader.readRecord()) {
|
141 | 881 | mdecorde | |
142 | 881 | mdecorde | File xmlfile = new File(outDir, csvreader.get("id")+".xml") |
143 | 881 | mdecorde | |
144 | 881 | mdecorde | XMLOutputFactory factory = XMLOutputFactory.newInstance() |
145 | 881 | mdecorde | FileOutputStream output = new FileOutputStream(xmlfile) |
146 | 881 | mdecorde | XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
|
147 | 881 | mdecorde | |
148 | 881 | mdecorde | writer.writeStartDocument("UTF-8","1.0") |
149 | 881 | mdecorde | writer.writeStartElement("text")
|
150 | 881 | mdecorde | |
151 | 881 | mdecorde | for (int i = 0 ; i < titles.size() ; i++) { |
152 | 881 | mdecorde | if (titles[i] == "text") continue |
153 | 881 | mdecorde | writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i)) |
154 | 881 | mdecorde | } |
155 | 881 | mdecorde | |
156 | 881 | mdecorde | writer.writeCharacters(csvreader.get("text")) // get text content |
157 | 881 | mdecorde | |
158 | 881 | mdecorde | writer.writeEndElement() //text
|
159 | 881 | mdecorde | writer.close() |
160 | 881 | mdecorde | output.close() |
161 | 881 | mdecorde | } |
162 | 881 | mdecorde | return true |
163 | 881 | mdecorde | } |
164 | 881 | mdecorde | |
165 | 881 | mdecorde | /**
|
166 | 881 | mdecorde | * The main method.
|
167 | 881 | mdecorde | *
|
168 | 881 | mdecorde | * @param args the arguments
|
169 | 881 | mdecorde | */
|
170 | 881 | mdecorde | static public void main(String[] args) |
171 | 881 | mdecorde | { |
172 | 881 | mdecorde | String home = System.getProperty("user.home") |
173 | 881 | mdecorde | File csvfile = new File(home, "xml/csv/test.csv") |
174 | 881 | mdecorde | File outfile = new File(home, "xml/csv/test.xml") |
175 | 881 | mdecorde | File outdir = new File(home, "xml/csv/out") |
176 | 881 | mdecorde | |
177 | 881 | mdecorde | String fieldSeparator = "\t" |
178 | 881 | mdecorde | String textSeparator = "" //or "'" |
179 | 881 | mdecorde | CsvToXml builder = new CsvToXml()
|
180 | 881 | mdecorde | |
181 | 881 | mdecorde | builder.toMultipleXMLFiles(csvfile, outdir, fieldSeparator, textSeparator, "UTF-8")
|
182 | 881 | mdecorde | |
183 | 881 | mdecorde | builder.toOneXMLFile(csvfile, outfile, fieldSeparator, textSeparator, "UTF-8")
|
184 | 881 | mdecorde | } |
185 | 881 | mdecorde | } |