Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / CsvToXml.groovy @ 966

History | View | Annotate | Download (5.3 kB)

1 881 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 881 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 881 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 881 mdecorde
// Sophia Antipolis, University of Paris 3.
5 881 mdecorde
//
6 881 mdecorde
// The TXM platform is free software: you can redistribute it
7 881 mdecorde
// and/or modify it under the terms of the GNU General Public
8 881 mdecorde
// License as published by the Free Software Foundation,
9 881 mdecorde
// either version 2 of the License, or (at your option) any
10 881 mdecorde
// later version.
11 881 mdecorde
//
12 881 mdecorde
// The TXM platform is distributed in the hope that it will be
13 881 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 881 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 881 mdecorde
// PURPOSE. See the GNU General Public License for more
16 881 mdecorde
// details.
17 881 mdecorde
//
18 881 mdecorde
// You should have received a copy of the GNU General
19 881 mdecorde
// Public License along with the TXM platform. If not, see
20 881 mdecorde
// http://www.gnu.org/licenses.
21 881 mdecorde
//
22 881 mdecorde
//
23 881 mdecorde
//
24 881 mdecorde
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $
25 881 mdecorde
// $LastChangedRevision: 3087 $
26 881 mdecorde
// $LastChangedBy: mdecorde $
27 881 mdecorde
//
28 881 mdecorde
package org.txm.importer
29 881 mdecorde
30 881 mdecorde
import java.io.FileInputStream
31 881 mdecorde
import java.io.InputStreamReader
32 881 mdecorde
import java.nio.charset.Charset
33 881 mdecorde
import org.txm.utils.*
34 881 mdecorde
35 881 mdecorde
import javax.xml.stream.*
36 881 mdecorde
import java.net.URL
37 881 mdecorde
38 881 mdecorde
/**
39 881 mdecorde
 * The Class CsvToXml: allow to convert CSV files to XML files
40 881 mdecorde
 *
41 881 mdecorde
 * @author mdecorde
42 881 mdecorde
 */
43 881 mdecorde
class CsvToXml {
44 881 mdecorde
45 881 mdecorde
        /** The titles. */
46 881 mdecorde
        File xmlfile
47 881 mdecorde
        List<String> titles = []
48 881 mdecorde
49 881 mdecorde
        private CsvReader getCSVReader(File csvfile, String fieldSeparator, String encoding) {
50 881 mdecorde
                CsvReader csvreader = new CsvReader(csvfile.getAbsolutePath(), fieldSeparator.charAt(0), Charset.forName(encoding))
51 881 mdecorde
52 881 mdecorde
                //get titles
53 881 mdecorde
                csvreader.readHeaders()
54 881 mdecorde
                titles = []
55 881 mdecorde
                for (String title : csvreader.getHeaders()) {
56 881 mdecorde
                        titles << title
57 881 mdecorde
                }
58 881 mdecorde
59 881 mdecorde
                if (titles.size() == 0) {
60 881 mdecorde
                        println "the CSV file as no column"
61 881 mdecorde
                        return null
62 881 mdecorde
                }
63 881 mdecorde
64 881 mdecorde
                if (titles.contains("text") == 0) {
65 881 mdecorde
                        println "the CSV file as no 'text' column"
66 881 mdecorde
                        return null
67 881 mdecorde
                }
68 881 mdecorde
69 881 mdecorde
                if (titles.contains("id") == 0) {
70 881 mdecorde
                        println "the CSV file as no 'id' column"
71 881 mdecorde
                        return null
72 881 mdecorde
                }
73 881 mdecorde
74 881 mdecorde
                return csvreader
75 881 mdecorde
        }
76 881 mdecorde
77 881 mdecorde
        /**
78 881 mdecorde
         * Convert a CSV file to ONE XML file using the first line to declare metadata
79 881 mdecorde
         *
80 881 mdecorde
         * @param csvfile the csvfile
81 881 mdecorde
         * @param outfile the outfile
82 881 mdecorde
         * @param fieldSeparator the field separator
83 881 mdecorde
         * @param textSeparator the text separator
84 881 mdecorde
         * @param encoding the encoding
85 881 mdecorde
         * @return true, if successful
86 881 mdecorde
         */
87 881 mdecorde
        public boolean toOneXMLFile(File csvfile, File xmlfile, String fieldSeparator, String textSeparator, String encoding) {
88 881 mdecorde
                CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding)
89 881 mdecorde
                if (csvreader == null) return false
90 881 mdecorde
91 881 mdecorde
                println "Metadata properties declared: "+titles
92 881 mdecorde
93 881 mdecorde
                XMLOutputFactory factory = XMLOutputFactory.newInstance()
94 881 mdecorde
                FileOutputStream output = new FileOutputStream(xmlfile)
95 881 mdecorde
                XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
96 881 mdecorde
97 881 mdecorde
                writer.writeStartDocument("UTF-8","1.0")
98 881 mdecorde
                writer.writeStartElement("text")
99 881 mdecorde
                while (csvreader.readRecord()) {
100 881 mdecorde
101 881 mdecorde
                        writer.writeStartElement("div")
102 881 mdecorde
103 881 mdecorde
                        for (int i = 0 ; i < titles.size() ; i++) {
104 881 mdecorde
                                if (titles[i] == "text") continue
105 881 mdecorde
                                writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i))
106 881 mdecorde
                        }
107 881 mdecorde
108 881 mdecorde
                        writer.writeCharacters(csvreader.get("text")) // get text content
109 881 mdecorde
                        writer.writeEndElement() // div
110 881 mdecorde
                }
111 881 mdecorde
112 881 mdecorde
                writer.writeEndElement() // text
113 881 mdecorde
                writer.close()
114 881 mdecorde
                output.close()
115 881 mdecorde
                csvreader.close()
116 881 mdecorde
117 881 mdecorde
                return true;
118 881 mdecorde
        }
119 881 mdecorde
120 881 mdecorde
        /**
121 881 mdecorde
        * Convert a CSV file to SEVERAL XML file using the first line to declare metadata
122 881 mdecorde
        *
123 881 mdecorde
        * @param csvfile the csvfile
124 881 mdecorde
        * @param outfile the outfile
125 881 mdecorde
        * @param fieldSeparator the field separator
126 881 mdecorde
        * @param textSeparator the text separator
127 881 mdecorde
        * @param encoding the encoding
128 881 mdecorde
        * @return true, if successful
129 881 mdecorde
        */
130 881 mdecorde
        public boolean toMultipleXMLFiles(File csvfile, File outDir, String fieldSeparator, String textSeparator, String encoding) {
131 881 mdecorde
                CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding)
132 881 mdecorde
                if (csvreader == null) return false
133 881 mdecorde
134 881 mdecorde
                if (!outDir.exists()) outDir.mkdirs()
135 881 mdecorde
                if (!outDir.exists()) {
136 881 mdecorde
                        println "Out directory does not exist and could not create it"
137 881 mdecorde
                        return false;
138 881 mdecorde
                }
139 881 mdecorde
140 881 mdecorde
                while (csvreader.readRecord()) {
141 881 mdecorde
142 881 mdecorde
                        File xmlfile = new File(outDir, csvreader.get("id")+".xml")
143 881 mdecorde
144 881 mdecorde
                        XMLOutputFactory factory = XMLOutputFactory.newInstance()
145 881 mdecorde
                        FileOutputStream output = new FileOutputStream(xmlfile)
146 881 mdecorde
                        XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
147 881 mdecorde
148 881 mdecorde
                        writer.writeStartDocument("UTF-8","1.0")
149 881 mdecorde
                        writer.writeStartElement("text")
150 881 mdecorde
151 881 mdecorde
                        for (int i = 0 ; i < titles.size() ; i++) {
152 881 mdecorde
                                if (titles[i] == "text") continue
153 881 mdecorde
                                writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i))
154 881 mdecorde
                        }
155 881 mdecorde
156 881 mdecorde
                        writer.writeCharacters(csvreader.get("text")) // get text content
157 881 mdecorde
158 881 mdecorde
                        writer.writeEndElement() //text
159 881 mdecorde
                        writer.close()
160 881 mdecorde
                        output.close()
161 881 mdecorde
                }
162 881 mdecorde
                return true
163 881 mdecorde
        }
164 881 mdecorde
165 881 mdecorde
        /**
166 881 mdecorde
         * The main method.
167 881 mdecorde
         *
168 881 mdecorde
         * @param args the arguments
169 881 mdecorde
         */
170 881 mdecorde
        static public void main(String[] args)
171 881 mdecorde
        {
172 881 mdecorde
                String home = System.getProperty("user.home")
173 881 mdecorde
                File csvfile = new File(home, "xml/csv/test.csv")
174 881 mdecorde
                File outfile = new File(home, "xml/csv/test.xml")
175 881 mdecorde
                File outdir = new File(home, "xml/csv/out")
176 881 mdecorde
177 881 mdecorde
                String fieldSeparator = "\t"
178 881 mdecorde
                String textSeparator = "" //or "'"
179 881 mdecorde
                CsvToXml builder = new CsvToXml()
180 881 mdecorde
181 881 mdecorde
                builder.toMultipleXMLFiles(csvfile, outdir, fieldSeparator, textSeparator, "UTF-8")
182 881 mdecorde
183 881 mdecorde
                builder.toOneXMLFile(csvfile, outfile, fieldSeparator, textSeparator, "UTF-8")
184 881 mdecorde
        }
185 881 mdecorde
}