Statistics
| Revision:

root / tmp / org.txm.core / src / java / org / txm / importer / CsvToXml.groovy @ 966

History | View | Annotate | Download (5.3 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
//
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
//
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
//
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $
25
// $LastChangedRevision: 3087 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.importer
29

    
30
import java.io.FileInputStream
31
import java.io.InputStreamReader
32
import java.nio.charset.Charset
33
import org.txm.utils.*
34

    
35
import javax.xml.stream.*
36
import java.net.URL
37

    
38
/**
39
 * The Class CsvToXml: allow to convert CSV files to XML files
40
 *
41
 * @author mdecorde
42
 */
43
class CsvToXml {
44

    
45
        /** The titles. */
46
        File xmlfile
47
        List<String> titles = []
48

    
49
        private CsvReader getCSVReader(File csvfile, String fieldSeparator, String encoding) {
50
                CsvReader csvreader = new CsvReader(csvfile.getAbsolutePath(), fieldSeparator.charAt(0), Charset.forName(encoding))
51

    
52
                //get titles
53
                csvreader.readHeaders()
54
                titles = []
55
                for (String title : csvreader.getHeaders()) {
56
                        titles << title
57
                }
58

    
59
                if (titles.size() == 0) {
60
                        println "the CSV file as no column"
61
                        return null
62
                }
63

    
64
                if (titles.contains("text") == 0) {
65
                        println "the CSV file as no 'text' column"
66
                        return null
67
                }
68

    
69
                if (titles.contains("id") == 0) {
70
                        println "the CSV file as no 'id' column"
71
                        return null
72
                }
73
                
74
                return csvreader
75
        }
76
        
77
        /**
78
         * Convert a CSV file to ONE XML file using the first line to declare metadata
79
         *
80
         * @param csvfile the csvfile
81
         * @param outfile the outfile
82
         * @param fieldSeparator the field separator
83
         * @param textSeparator the text separator
84
         * @param encoding the encoding
85
         * @return true, if successful
86
         */
87
        public boolean toOneXMLFile(File csvfile, File xmlfile, String fieldSeparator, String textSeparator, String encoding) {
88
                CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding)
89
                if (csvreader == null) return false
90
                
91
                println "Metadata properties declared: "+titles
92

    
93
                XMLOutputFactory factory = XMLOutputFactory.newInstance()
94
                FileOutputStream output = new FileOutputStream(xmlfile)
95
                XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
96

    
97
                writer.writeStartDocument("UTF-8","1.0")
98
                writer.writeStartElement("text")
99
                while (csvreader.readRecord()) {
100
                        
101
                        writer.writeStartElement("div")
102

    
103
                        for (int i = 0 ; i < titles.size() ; i++) {
104
                                if (titles[i] == "text") continue
105
                                writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i))
106
                        }
107

    
108
                        writer.writeCharacters(csvreader.get("text")) // get text content
109
                        writer.writeEndElement() // div
110
                }
111

    
112
                writer.writeEndElement() // text
113
                writer.close()
114
                output.close()
115
                csvreader.close()
116

    
117
                return true;
118
        }
119
        
120
        /**
121
        * Convert a CSV file to SEVERAL XML file using the first line to declare metadata
122
        *
123
        * @param csvfile the csvfile
124
        * @param outfile the outfile
125
        * @param fieldSeparator the field separator
126
        * @param textSeparator the text separator
127
        * @param encoding the encoding
128
        * @return true, if successful
129
        */
130
        public boolean toMultipleXMLFiles(File csvfile, File outDir, String fieldSeparator, String textSeparator, String encoding) {
131
                CsvReader csvreader = getCSVReader(csvfile, fieldSeparator, encoding)
132
                if (csvreader == null) return false
133
                
134
                if (!outDir.exists()) outDir.mkdirs()
135
                if (!outDir.exists()) {
136
                        println "Out directory does not exist and could not create it"
137
                        return false;
138
                }
139
                
140
                while (csvreader.readRecord()) {
141
                        
142
                        File xmlfile = new File(outDir, csvreader.get("id")+".xml")
143
                        
144
                        XMLOutputFactory factory = XMLOutputFactory.newInstance()
145
                        FileOutputStream output = new FileOutputStream(xmlfile)
146
                        XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8")
147
        
148
                        writer.writeStartDocument("UTF-8","1.0")
149
                        writer.writeStartElement("text")
150
                        
151
                        for (int i = 0 ; i < titles.size() ; i++) {
152
                                if (titles[i] == "text") continue
153
                                writer.writeAttribute(AsciiUtils.buildId(titles.get(i)), csvreader.get(i))
154
                        }
155

    
156
                        writer.writeCharacters(csvreader.get("text")) // get text content
157
                        
158
                        writer.writeEndElement() //text
159
                        writer.close()
160
                        output.close()
161
                }
162
                return true
163
        }
164

    
165
        /**
166
         * The main method.
167
         *
168
         * @param args the arguments
169
         */
170
        static public void main(String[] args)
171
        {
172
                String home = System.getProperty("user.home")
173
                File csvfile = new File(home, "xml/csv/test.csv")
174
                File outfile = new File(home, "xml/csv/test.xml")
175
                File outdir = new File(home, "xml/csv/out")
176

    
177
                String fieldSeparator = "\t"
178
                String textSeparator = "" //or "'"
179
                CsvToXml builder = new CsvToXml()
180
                
181
                builder.toMultipleXMLFiles(csvfile, outdir, fieldSeparator, textSeparator, "UTF-8")
182
                
183
                builder.toOneXMLFile(csvfile, outfile, fieldSeparator, textSeparator, "UTF-8")
184
        }
185
}