root / tmp / org.txm.core / src / java / org / txm / scripts / importer / CsvToXml.groovy @ 2473
History | View | Annotate | Download (5.3 kB)
1 |
// Copyright © 2010-2013 ENS de Lyon.
|
---|---|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice
|
4 |
// Sophia Antipolis, University of Paris 3.
|
5 |
//
|
6 |
// The TXM platform is free software: you can redistribute it
|
7 |
// and/or modify it under the terms of the GNU General Public
|
8 |
// License as published by the Free Software Foundation,
|
9 |
// either version 2 of the License, or (at your option) any
|
10 |
// later version.
|
11 |
//
|
12 |
// The TXM platform is distributed in the hope that it will be
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 |
// PURPOSE. See the GNU General Public License for more
|
16 |
// details.
|
17 |
//
|
18 |
// You should have received a copy of the GNU General
|
19 |
// Public License along with the TXM platform. If not, see
|
20 |
// http://www.gnu.org/licenses.
|
21 |
//
|
22 |
//
|
23 |
//
|
24 |
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $
|
25 |
// $LastChangedRevision: 3087 $
|
26 |
// $LastChangedBy: mdecorde $
|
27 |
//
|
28 |
package org.txm.scripts.importer
|
29 |
|
30 |
import java.io.FileInputStream |
31 |
import java.io.InputStreamReader |
32 |
import java.nio.charset.Charset |
33 |
import org.txm.utils.* |
34 |
|
35 |
import javax.xml.stream.* |
36 |
import java.net.URL |
37 |
|
38 |
/**
|
39 |
* The Class CsvToXml: allow to convert CSV files to XML files
|
40 |
*
|
41 |
* @author mdecorde
|
42 |
*/
|
43 |
class CsvToXml { |
44 |
|
45 |
/** The titles. */
|
46 |
File xmlfile
|
47 |
List<String> titles = [] |
48 |
|
49 |
private CsvReader getCSVReader(File csvfile, String fieldSeparator, String encoding) { |
50 |
CsvReader csvreader = new CsvReader(csvfile.getAbsolutePath(), fieldSeparator.charAt(0), Charset.forName(encoding)) |
51 |
|
52 |
//get titles
|
53 |
csvreader.readHeaders() |
54 |
titles = []
|
55 |
for (String title : csvreader.getHeaders()) { |
56 |
titles << title |
57 |
} |
58 |
|
59 |
if (titles.size() == 0) { |
60 |
println "the CSV file as no column"
|
61 |
return null |
62 |
} |
63 |
|
64 |
if (titles.contains("text") == 0) { |
65 |
println "the CSV file as no 'text' column"
|
66 |
return null |
67 |
} |
68 |
|
69 |
if (titles.contains("id") == 0) { |
70 |
println "the CSV file as no 'id' column"
|
71 |
return null |
72 |
} |
73 |
|
74 |
return csvreader
|
75 |
} |
76 |
|
77 |
/**
|
78 |
* Convert a CSV file to ONE XML file using the first line to declare metadata
|
79 |
*
|
80 |
* @param csvfile the csvfile
|
81 |
* @param outfile the outfile
|
82 |
* @param fieldSeparator the field separator
|
83 |
* @param textSeparator the text separator
|
84 |
* @param encoding the encoding
|
85 |
* @return true, if successful
|
86 |
*/
|
87 |
public boolean toOneXMLFile(File csvfile, File xmlfile, String fieldSeparator, String textSeparator, String encoding) { |
88 |
CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding) |
89 |
if (csvreader == null) return false |
90 |
|
91 |
println "Metadata properties declared: "+titles
|
92 |
|
93 |
XMLOutputFactory factory = XMLOutputFactory.newInstance() |
94 |
FileOutputStream output = new FileOutputStream(xmlfile) |
95 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
|
96 |
|
97 |
writer.writeStartDocument("UTF-8","1.0") |
98 |
writer.writeStartElement("text")
|
99 |
while (csvreader.readRecord()) {
|
100 |
|
101 |
writer.writeStartElement("div")
|
102 |
|
103 |
for (int i = 0 ; i < titles.size() ; i++) { |
104 |
if (titles[i] == "text") continue |
105 |
writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i)) |
106 |
} |
107 |
|
108 |
writer.writeCharacters(csvreader.get("text")) // get text content |
109 |
writer.writeEndElement() // div
|
110 |
} |
111 |
|
112 |
writer.writeEndElement() // text
|
113 |
writer.close() |
114 |
output.close() |
115 |
csvreader.close() |
116 |
|
117 |
return true; |
118 |
} |
119 |
|
120 |
/**
|
121 |
* Convert a CSV file to SEVERAL XML file using the first line to declare metadata
|
122 |
*
|
123 |
* @param csvfile the csvfile
|
124 |
* @param outfile the outfile
|
125 |
* @param fieldSeparator the field separator
|
126 |
* @param textSeparator the text separator
|
127 |
* @param encoding the encoding
|
128 |
* @return true, if successful
|
129 |
*/
|
130 |
public boolean toMultipleXMLFiles(File csvfile, File outDir, String fieldSeparator, String textSeparator, String encoding) { |
131 |
CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding) |
132 |
if (csvreader == null) return false |
133 |
|
134 |
if (!outDir.exists()) outDir.mkdirs()
|
135 |
if (!outDir.exists()) {
|
136 |
println "Out directory does not exist and could not create it"
|
137 |
return false; |
138 |
} |
139 |
|
140 |
while (csvreader.readRecord()) {
|
141 |
|
142 |
File xmlfile = new File(outDir, csvreader.get("id")+".xml") |
143 |
|
144 |
XMLOutputFactory factory = XMLOutputFactory.newInstance() |
145 |
FileOutputStream output = new FileOutputStream(xmlfile) |
146 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
|
147 |
|
148 |
writer.writeStartDocument("UTF-8","1.0") |
149 |
writer.writeStartElement("text")
|
150 |
|
151 |
for (int i = 0 ; i < titles.size() ; i++) { |
152 |
if (titles[i] == "text") continue |
153 |
writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i)) |
154 |
} |
155 |
|
156 |
writer.writeCharacters(csvreader.get("text")) // get text content |
157 |
|
158 |
writer.writeEndElement() //text
|
159 |
writer.close() |
160 |
output.close() |
161 |
} |
162 |
return true |
163 |
} |
164 |
|
165 |
/**
|
166 |
* The main method.
|
167 |
*
|
168 |
* @param args the arguments
|
169 |
*/
|
170 |
static public void main(String[] args) |
171 |
{ |
172 |
String home = System.getProperty("user.home") |
173 |
File csvfile = new File(home, "xml/csv/test.csv") |
174 |
File outfile = new File(home, "xml/csv/test.xml") |
175 |
File outdir = new File(home, "xml/csv/out") |
176 |
|
177 |
String fieldSeparator = "\t" |
178 |
String textSeparator = "" //or "'" |
179 |
CsvToXml builder = new CsvToXml()
|
180 |
|
181 |
builder.toMultipleXMLFiles(csvfile, outdir, fieldSeparator, textSeparator, "UTF-8")
|
182 |
|
183 |
builder.toOneXMLFile(csvfile, outfile, fieldSeparator, textSeparator, "UTF-8")
|
184 |
} |
185 |
} |