27 |
27 |
//
|
28 |
28 |
package org.txm.metadatas;
|
29 |
29 |
|
30 |
|
import groovy.util.Node;
|
31 |
|
|
32 |
30 |
import java.io.BufferedWriter;
|
33 |
31 |
import java.io.File;
|
34 |
|
import java.io.FileNotFoundException;
|
35 |
32 |
import java.io.FileOutputStream;
|
36 |
33 |
import java.io.IOException;
|
37 |
34 |
import java.io.OutputStreamWriter;
|
... | ... | |
62 |
59 |
import javax.xml.xpath.XPathExpressionException;
|
63 |
60 |
import javax.xml.xpath.XPathFactory;
|
64 |
61 |
|
65 |
|
import org.odftoolkit.odfdom.dom.element.table.TableTableElement;
|
66 |
|
import org.odftoolkit.simple.SpreadsheetDocument;
|
67 |
|
import org.odftoolkit.simple.table.Row;
|
68 |
|
import org.odftoolkit.simple.table.Table;
|
69 |
62 |
import org.txm.importer.AddAttributeInXml;
|
70 |
63 |
import org.txm.importer.PersonalNamespaceContext;
|
71 |
64 |
import org.txm.libs.msoffice.ReadExcel;
|
... | ... | |
79 |
72 |
import org.w3c.dom.Element;
|
80 |
73 |
import org.w3c.dom.NodeList;
|
81 |
74 |
|
82 |
|
// TODO: Auto-generated Javadoc
|
|
75 |
import groovy.util.Node;
|
|
76 |
|
83 |
77 |
/**
|
84 |
78 |
* The Class Metadatas.
|
85 |
79 |
*/
|
86 |
|
public class Metadatas extends LinkedHashMap<String , TextInjection> {
|
87 |
|
|
88 |
|
/** Load metadatas from a xml file $lt;metadatas> $lt;meta key="key1"> $lt;entry name="attr1" value="value1"/> $lt;entry name="attr1" value="value1"/> $lt;meta/> $lt;/metadatas>. */
|
|
80 |
public class Metadatas extends LinkedHashMap<String, TextInjection> {
|
|
81 |
|
|
82 |
/**
|
|
83 |
* Load metadatas from a xml file $lt;metadatas> $lt;meta key="key1"> $lt;entry
|
|
84 |
* name="attr1" value="value1"/> $lt;entry name="attr1" value="value1"/>
|
|
85 |
* $lt;meta/> $lt;/metadatas>.
|
|
86 |
*/
|
89 |
87 |
File xmlfile;
|
90 |
|
|
|
88 |
|
91 |
89 |
/** The metadatas. */
|
92 |
90 |
ArrayList<Metadata> metadatas = new ArrayList<Metadata>();
|
93 |
|
|
|
91 |
|
94 |
92 |
/** The headers list. */
|
95 |
93 |
ArrayList<String> headersList = new ArrayList<String>();
|
96 |
|
|
|
94 |
|
97 |
95 |
/** The isinialize. */
|
98 |
96 |
boolean isInitialize = false;
|
99 |
|
|
|
97 |
|
100 |
98 |
/**
|
101 |
99 |
* Checks if is initialized.
|
102 |
100 |
*
|
103 |
101 |
* @return true, if is initialized
|
104 |
102 |
*/
|
105 |
|
public boolean isInitialized()
|
106 |
|
{
|
|
103 |
public boolean isInitialized() {
|
|
104 |
|
107 |
105 |
return isInitialize;
|
108 |
106 |
}
|
109 |
|
|
|
107 |
|
110 |
108 |
/** The ns context. */
|
111 |
109 |
NamespaceContext nsContext = new PersonalNamespaceContext();
|
112 |
|
|
|
110 |
|
113 |
111 |
/**
|
114 |
112 |
* Instantiates a new metadatas.
|
115 |
113 |
*
|
116 |
114 |
* @param xmlfile the xmlfile
|
117 |
115 |
*/
|
118 |
|
public Metadatas(File xmlfile)
|
119 |
|
{
|
|
116 |
public Metadatas(File xmlfile) {
|
|
117 |
|
120 |
118 |
this.xmlfile = xmlfile;
|
121 |
|
if(!initialize())
|
122 |
|
System.out.println("Error: failed to load metadata from the file "+xmlfile);
|
|
119 |
if (!initialize())
|
|
120 |
System.out.println("Error: failed to load metadata from the file " + xmlfile);
|
123 |
121 |
}
|
124 |
|
|
|
122 |
|
125 |
123 |
/**
|
126 |
124 |
* Find a metadata file in a directory
|
127 |
125 |
*
|
128 |
126 |
* @param directory
|
129 |
|
* @return an input file, try in order : ods, xlsx, tsv and finally csv extension
|
|
127 |
* @return an input file, try in order : ods, xlsx, tsv and finally csv
|
|
128 |
* extension
|
130 |
129 |
*/
|
131 |
130 |
public static File findMetadataFile(File directory) {
|
|
131 |
|
132 |
132 |
File f = new File(directory, "metadata.ods");
|
133 |
133 |
if (f.exists()) return f;
|
134 |
134 |
|
... | ... | |
138 |
138 |
f = new File(directory, "metadata.tsv");
|
139 |
139 |
if (f.exists()) return f;
|
140 |
140 |
|
141 |
|
return new File(directory, "metadata.csv");
|
|
141 |
return new File(directory, "metadata.csv");
|
142 |
142 |
}
|
143 |
143 |
|
144 |
144 |
/**
|
... | ... | |
149 |
149 |
* @param separator the separator
|
150 |
150 |
* @param nbheaderline the nbheaderline
|
151 |
151 |
*/
|
152 |
|
public Metadatas(File inputFile, String encoding, String separator, String txtseparator, int nbheaderline)
|
153 |
|
{
|
154 |
|
File xmlfile = new File(inputFile.getParent(), inputFile.getName()+".xml");
|
155 |
|
//println "create xml file version of "+csvfile+" : "+xmlfile
|
|
152 |
public Metadatas(File inputFile, String encoding, String separator, String txtseparator, int nbheaderline) {
|
|
153 |
|
|
154 |
File xmlfile = new File(inputFile.getParent(), inputFile.getName() + ".xml");
|
|
155 |
// println "create xml file version of "+csvfile+" : "+xmlfile
|
156 |
156 |
try {
|
157 |
157 |
if (inputFile.getName().endsWith(".ods")) {
|
158 |
|
if (convertODSToXml(inputFile, xmlfile)) {
|
|
158 |
if (convertODSToXml(inputFile, xmlfile)) {
|
159 |
159 |
this.xmlfile = xmlfile;
|
160 |
|
//println "xml file : "+xmlfile
|
161 |
|
|
|
160 |
// println "xml file : "+xmlfile
|
|
161 |
|
162 |
162 |
}
|
163 |
|
} else if (inputFile.getName().endsWith(".xlsx")) {
|
164 |
|
if (convertXLSXToXml(inputFile, xmlfile)) {
|
|
163 |
}
|
|
164 |
else if (inputFile.getName().endsWith(".xlsx")) {
|
|
165 |
if (convertXLSXToXml(inputFile, xmlfile)) {
|
165 |
166 |
this.xmlfile = xmlfile;
|
166 |
|
//println "xml file : "+xmlfile
|
167 |
|
|
|
167 |
// println "xml file : "+xmlfile
|
|
168 |
|
168 |
169 |
}
|
169 |
|
} else if (inputFile.getName().endsWith(".tsv")) {
|
170 |
|
if (convertCsvToXml(inputFile, xmlfile, encoding, "\t", "", 0)) {
|
|
170 |
}
|
|
171 |
else if (inputFile.getName().endsWith(".tsv")) {
|
|
172 |
if (convertCsvToXml(inputFile, xmlfile, encoding, "\t", "", 0)) {
|
171 |
173 |
this.xmlfile = xmlfile;
|
172 |
|
//println "xml file : "+xmlfile
|
173 |
|
|
|
174 |
// println "xml file : "+xmlfile
|
|
175 |
|
174 |
176 |
}
|
175 |
|
} else {
|
176 |
|
if (convertCsvToXml(inputFile, xmlfile, encoding, separator, txtseparator, nbheaderline)) {
|
|
177 |
}
|
|
178 |
else {
|
|
179 |
if (convertCsvToXml(inputFile, xmlfile, encoding, separator, txtseparator, nbheaderline)) {
|
177 |
180 |
this.xmlfile = xmlfile;
|
178 |
|
//println "xml file : "+xmlfile
|
179 |
|
|
|
181 |
// println "xml file : "+xmlfile
|
|
182 |
|
180 |
183 |
}
|
181 |
184 |
}
|
182 |
|
|
|
185 |
|
183 |
186 |
if (!initialize()) {
|
184 |
|
System.out.println("Error: failed to load metadata from the file "+xmlfile);
|
|
187 |
System.out.println("Error: failed to load metadata from the file " + xmlfile);
|
185 |
188 |
}
|
186 |
|
} catch (Exception e) {
|
|
189 |
}
|
|
190 |
catch (Exception e) {
|
187 |
191 |
// TODO Auto-generated catch block
|
188 |
192 |
org.txm.utils.logger.Log.printStackTrace(e);
|
189 |
193 |
Log.severe(e.toString());
|
190 |
194 |
}
|
191 |
195 |
}
|
192 |
|
|
|
196 |
|
193 |
197 |
private boolean convertXLSXToXml(File inputFile, File xmlFile) throws Exception {
|
|
198 |
|
194 |
199 |
ArrayList<ArrayList<String>> data = ReadExcel.toTable(inputFile, "metadata");
|
195 |
|
System.out.println(data);
|
196 |
200 |
return data.size() > 0 && convertTableToXml(data, xmlFile);
|
197 |
201 |
}
|
198 |
|
|
|
202 |
|
199 |
203 |
private boolean convertTableToXml(ArrayList<ArrayList<String>> data, File xmlFile) throws Exception {
|
|
204 |
|
200 |
205 |
XMLOutputFactory factory = XMLOutputFactory.newInstance();
|
201 |
206 |
FileOutputStream output = new FileOutputStream(xmlFile);
|
202 |
|
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");//create a new file
|
203 |
|
|
|
207 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");// create a new file
|
|
208 |
|
204 |
209 |
writer.writeStartDocument("UTF-8", "1.0");
|
205 |
210 |
writer.writeCharacters("\n");
|
206 |
211 |
writer.writeStartElement("enrichissement");
|
207 |
212 |
writer.writeCharacters("\n");
|
208 |
213 |
writer.writeStartElement("metadatas");
|
209 |
214 |
writer.writeCharacters("\n");
|
210 |
|
|
|
215 |
|
211 |
216 |
ArrayList<String> headers = data.get(0); // first line
|
212 |
|
|
213 |
|
for (int i = 1 ; i < headers.size() ; i++) {
|
|
217 |
|
|
218 |
for (int i = 1; i < headers.size(); i++) {
|
214 |
219 |
if (headers.get(i).length() == 0) {
|
215 |
220 |
headers.set(i, "noname");
|
216 |
|
System.out.println("Warning: the "+(i+1)+"the column name is empty");
|
|
221 |
System.out.println("Warning: the " + (i + 1) + "the column name is empty");
|
217 |
222 |
}
|
218 |
|
//if(!headers[i].equals("id"))// the first
|
219 |
|
//{
|
|
223 |
// if(!headers[i].equals("id"))// the first
|
|
224 |
// {
|
220 |
225 |
writer.writeStartElement("metadata");
|
221 |
226 |
writer.writeAttribute("id", AsciiUtils.buildId(headers.get(i)));
|
222 |
227 |
writer.writeAttribute("shortname", headers.get(i));
|
... | ... | |
226 |
231 |
writer.writeAttribute("selection", "true");
|
227 |
232 |
writer.writeAttribute("partition", "true");
|
228 |
233 |
writer.writeAttribute("display", "true");
|
229 |
|
|
|
234 |
|
230 |
235 |
writer.writeEndElement();
|
231 |
236 |
writer.writeCharacters("\n");
|
232 |
|
//}
|
|
237 |
// }
|
233 |
238 |
}
|
234 |
|
writer.writeEndElement(); //close metadatas
|
|
239 |
writer.writeEndElement(); // close metadatas
|
235 |
240 |
writer.writeCharacters("\n");
|
236 |
|
|
|
241 |
|
237 |
242 |
writer.writeStartElement("texts");
|
238 |
243 |
writer.writeCharacters("\n");
|
239 |
|
for (int i = 1 ; i < data.size() ; i++) { // the next lines
|
|
244 |
for (int i = 1; i < data.size(); i++) { // the next lines
|
240 |
245 |
ArrayList<String> dataline = data.get(i);
|
241 |
|
|
|
246 |
|
242 |
247 |
writer.writeStartElement("text");
|
243 |
|
|
244 |
|
// write the id attribute
|
245 |
|
for (int j = 0 ; j < headers.size() ; j++) {
|
|
248 |
|
|
249 |
// write the id attribute
|
|
250 |
for (int j = 0; j < headers.size(); j++) {
|
246 |
251 |
if (headers.get(j).equals("id")) {
|
247 |
252 |
writer.writeAttribute("id", dataline.get(j));
|
248 |
253 |
}
|
249 |
254 |
}
|
250 |
255 |
// write the other attributes
|
251 |
|
for (int j = 0 ; j < headers.size() ; j++) {
|
|
256 |
for (int j = 0; j < headers.size(); j++) {
|
252 |
257 |
if (headers.get(j).equals("id")) {
|
253 |
258 |
continue;
|
254 |
259 |
}
|
255 |
260 |
writer.writeStartElement("entry");
|
256 |
261 |
writer.writeAttribute("id", headers.get(j));
|
257 |
262 |
if (dataline.size() <= j) {
|
258 |
|
System.out.println("Warning: malformed data line="+dataline+" at="+j+" for header="+headers);
|
|
263 |
System.out.println("Warning: malformed data line=" + dataline + " at=" + j + " for header=" + headers);
|
259 |
264 |
writer.writeAttribute("value", "");
|
260 |
|
} else {
|
|
265 |
}
|
|
266 |
else {
|
261 |
267 |
writer.writeAttribute("value", dataline.get(j));
|
262 |
268 |
}
|
263 |
269 |
writer.writeEndElement();
|
... | ... | |
265 |
271 |
writer.writeEndElement();
|
266 |
272 |
writer.writeCharacters("\n");
|
267 |
273 |
}
|
268 |
|
writer.writeEndElement();//close texts
|
|
274 |
writer.writeEndElement();// close texts
|
269 |
275 |
writer.writeCharacters("\n");
|
270 |
|
writer.writeEndElement();//close metadatas
|
271 |
|
|
|
276 |
writer.writeEndElement();// close metadatas
|
|
277 |
|
272 |
278 |
writer.close();
|
273 |
279 |
output.close();
|
274 |
280 |
writer.close();
|
275 |
281 |
return true;
|
276 |
282 |
}
|
277 |
|
|
|
283 |
|
278 |
284 |
private boolean convertODSToXml(File inputFile, File xmlFile) throws Exception {
|
|
285 |
|
279 |
286 |
ArrayList<ArrayList<String>> data = ReadODS.toTable(inputFile, "metadata");
|
280 |
|
|
|
287 |
|
281 |
288 |
return data.size() > 0 && convertTableToXml(data, xmlFile);
|
282 |
289 |
}
|
283 |
|
|
|
290 |
|
284 |
291 |
public File getXMLFile() {
|
|
292 |
|
285 |
293 |
return xmlfile;
|
286 |
294 |
}
|
287 |
|
|
|
295 |
|
288 |
296 |
/** The sattr. */
|
289 |
297 |
String sattr = "";
|
290 |
|
|
|
298 |
|
291 |
299 |
/**
|
292 |
300 |
* Initialize.
|
293 |
301 |
*
|
294 |
302 |
* @return true, if successful
|
295 |
303 |
*/
|
296 |
|
private boolean initialize()
|
297 |
|
{
|
|
304 |
private boolean initialize() {
|
|
305 |
|
298 |
306 |
Document doc;
|
299 |
307 |
try {
|
300 |
308 |
doc = DomUtils.load(xmlfile);
|
301 |
|
} catch (Exception e1) {
|
|
309 |
}
|
|
310 |
catch (Exception e1) {
|
302 |
311 |
org.txm.utils.logger.Log.printStackTrace(e1);
|
303 |
312 |
Log.severe(e1.toString());
|
304 |
313 |
return false;
|
... | ... | |
306 |
315 |
NodeList metadatasNodes = doc.getElementsByTagName("metadatas");
|
307 |
316 |
NodeList metadataNodes = null;
|
308 |
317 |
// get metadata nodes
|
309 |
|
for (int i = 0 ; i < metadatasNodes.getLength() ; ) {
|
310 |
|
Element metadatasNode = (Element)metadatasNodes.item(0);
|
|
318 |
for (int i = 0; i < metadatasNodes.getLength();) {
|
|
319 |
Element metadatasNode = (Element) metadatasNodes.item(0);
|
311 |
320 |
metadataNodes = metadatasNode.getElementsByTagName("metadata");
|
312 |
321 |
break;
|
313 |
322 |
}
|
314 |
|
for (int i = 0 ; i < metadataNodes.getLength() ; i++) {
|
315 |
|
Element metadataNode = (Element)metadataNodes.item(i);
|
|
323 |
for (int i = 0; i < metadataNodes.getLength(); i++) {
|
|
324 |
Element metadataNode = (Element) metadataNodes.item(i);
|
316 |
325 |
Metadata m = new Metadata(metadataNode);
|
317 |
326 |
if (m.id == null) {
|
318 |
327 |
System.out.println("The metadata file is missing attribute 'id'");
|
319 |
328 |
return false;
|
320 |
329 |
}
|
321 |
330 |
this.metadatas.add(m);
|
322 |
|
sattr +="+"+m.id;
|
|
331 |
sattr += "+" + m.id;
|
323 |
332 |
headersList.add(m.id);
|
324 |
333 |
}
|
325 |
|
|
|
334 |
|
326 |
335 |
// List<Node> Lmetadatas = doc.texts.text;
|
327 |
336 |
// get metadata nodes
|
328 |
337 |
NodeList textsNodes = doc.getElementsByTagName("texts");
|
329 |
338 |
NodeList textNodes = null;
|
330 |
|
for(int i = 0 ; i < textsNodes.getLength() ;) { // get text nodes
|
331 |
|
Element textsNode = (Element)textsNodes.item(0);
|
|
339 |
for (int i = 0; i < textsNodes.getLength();) { // get text nodes
|
|
340 |
Element textsNode = (Element) textsNodes.item(0);
|
332 |
341 |
textNodes = textsNode.getElementsByTagName("text");
|
333 |
342 |
break;
|
334 |
343 |
}
|
335 |
|
for(int i = 0 ; i < textNodes.getLength() ; i++) {
|
336 |
|
Element e = (Element)textNodes.item(i);
|
|
344 |
for (int i = 0; i < textNodes.getLength(); i++) {
|
|
345 |
Element e = (Element) textNodes.item(i);
|
337 |
346 |
TextInjection inj = new TextInjection(e);
|
338 |
347 |
this.put(inj.id, inj);
|
339 |
348 |
}
|
340 |
|
|
|
349 |
|
341 |
350 |
isInitialize = true;
|
342 |
351 |
return true;
|
343 |
352 |
}
|
344 |
|
|
345 |
|
public HashMap<String, String> getTextMetadata(File f)
|
346 |
|
{
|
|
353 |
|
|
354 |
public HashMap<String, String> getTextMetadata(File f) {
|
|
355 |
|
347 |
356 |
HashMap<String, String> data = new HashMap<String, String>();
|
348 |
357 |
String txtname = f.getName();
|
349 |
358 |
int idx = txtname.lastIndexOf(".");
|
350 |
|
if(idx > 0) txtname = txtname.substring(0, idx);
|
351 |
|
|
|
359 |
if (idx > 0) txtname = txtname.substring(0, idx);
|
|
360 |
|
352 |
361 |
TextInjection injection = this.get(txtname);
|
353 |
362 |
if (injection == null) {
|
354 |
|
System.out.println("Could not find injection for text "+txtname);
|
|
363 |
System.out.println("Could not find injection for text " + txtname);
|
355 |
364 |
return data;
|
356 |
365 |
}
|
357 |
366 |
for (org.txm.metadatas.Entry e : injection) {
|
358 |
367 |
data.put(e.getId(), e.getValue());
|
359 |
368 |
}
|
360 |
|
|
|
369 |
|
361 |
370 |
return data;
|
362 |
371 |
}
|
363 |
|
|
|
372 |
|
364 |
373 |
/**
|
365 |
374 |
* Convert csv to xml.
|
366 |
375 |
*
|
... | ... | |
370 |
379 |
* @param separator the separator
|
371 |
380 |
* @param nbheaderline the nbheaderline
|
372 |
381 |
* @return true, if successful
|
373 |
|
* @throws Exception
|
|
382 |
* @throws Exception
|
374 |
383 |
*/
|
375 |
|
public static boolean convertCsvToXml(File csvfile, File xmlFile, String encoding, String separator, String txtseparator, int nbheaderline) throws Exception
|
376 |
|
{
|
|
384 |
public static boolean convertCsvToXml(File csvfile, File xmlFile, String encoding, String separator, String txtseparator, int nbheaderline) throws Exception {
|
|
385 |
|
377 |
386 |
if (separator == null || separator.length() == 0) {
|
378 |
387 |
separator = "\t";
|
379 |
388 |
}
|
... | ... | |
381 |
390 |
encoding = "UTF-8";
|
382 |
391 |
}
|
383 |
392 |
xmlFile.createNewFile();
|
384 |
|
|
385 |
|
if(!csvfile.exists())
|
386 |
|
{
|
|
393 |
|
|
394 |
if (!csvfile.exists()) {
|
387 |
395 |
System.out.println("Error: CSV file does not exists");
|
388 |
396 |
return false;
|
389 |
397 |
}
|
390 |
|
|
|
398 |
|
391 |
399 |
XMLOutputFactory factory = XMLOutputFactory.newInstance();
|
392 |
400 |
FileOutputStream output = new FileOutputStream(xmlFile);
|
393 |
|
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");//create a new file
|
394 |
|
|
|
401 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");// create a new file
|
|
402 |
|
395 |
403 |
CsvReader reader = new CsvReader(csvfile.getAbsolutePath(), separator.charAt(0), Charset.forName(encoding));
|
396 |
404 |
if (txtseparator != null && txtseparator.length() > 0)
|
397 |
405 |
reader.setTextQualifier(txtseparator.charAt(0));
|
398 |
|
|
|
406 |
|
399 |
407 |
reader.readHeaders();
|
400 |
|
|
|
408 |
|
401 |
409 |
String[] headers = reader.getHeaders();
|
402 |
|
|
403 |
|
if (headers.length == 0)
|
404 |
|
{
|
405 |
|
System.out.println("Error: No header in the metadata file "+csvfile+" with separators: column='"+separator+"' and text='"+txtseparator+"'");
|
|
410 |
|
|
411 |
if (headers.length == 0) {
|
|
412 |
System.out.println("Error: No header in the metadata file " + csvfile + " with separators: column='" + separator + "' and text='" + txtseparator + "'");
|
406 |
413 |
writer.close();
|
407 |
414 |
output.close();
|
408 |
415 |
return false;
|
409 |
416 |
}
|
410 |
|
|
411 |
|
if(!headers[0].equals("id"))
|
412 |
|
{
|
413 |
|
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
|
|
417 |
|
|
418 |
if (!headers[0].equals("id")) {
|
|
419 |
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '" + headers[0]
|
|
420 |
+ "' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
|
414 |
421 |
writer.close();
|
415 |
422 |
output.close();
|
416 |
423 |
if (!separator.equals("\t")) {
|
... | ... | |
418 |
425 |
return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline);
|
419 |
426 |
}
|
420 |
427 |
}
|
421 |
|
|
422 |
|
//check for double columns
|
|
428 |
|
|
429 |
// check for double columns
|
423 |
430 |
HashSet<String> testhash = new HashSet<String>();
|
424 |
431 |
HashSet<String> doubles = new HashSet<String>();
|
425 |
|
for (String str : headers)
|
426 |
|
{
|
|
432 |
for (String str : headers) {
|
427 |
433 |
if (testhash.contains(str))
|
428 |
434 |
doubles.add(str);
|
429 |
435 |
testhash.add(str);
|
430 |
436 |
}
|
431 |
|
if (doubles.size() > 0)
|
432 |
|
{
|
433 |
|
System.out.println("Error: the metadata file '$csvfile' contains duplicated column names: "+doubles);
|
|
437 |
if (doubles.size() > 0) {
|
|
438 |
System.out.println("Error: the metadata file '$csvfile' contains duplicated column names: " + doubles);
|
434 |
439 |
return false;
|
435 |
440 |
}
|
436 |
|
|
|
441 |
|
437 |
442 |
String[] longnames = new String[headers.length];
|
438 |
443 |
String[] types = new String[headers.length];
|
439 |
|
if (nbheaderline > 1)//get longnames
|
440 |
|
{
|
|
444 |
if (nbheaderline > 1) {// get longnames
|
441 |
445 |
reader.readRecord();
|
442 |
|
for (int i = 0 ; i < headers.length ; i++) {
|
|
446 |
for (int i = 0; i < headers.length; i++) {
|
443 |
447 |
longnames[i] = reader.get(headers[i]);
|
444 |
448 |
}
|
445 |
449 |
}
|
446 |
|
else
|
447 |
|
for (int i = 0 ; i < headers.length ; i++) {
|
|
450 |
else {
|
|
451 |
for (int i = 0; i < headers.length; i++) {
|
448 |
452 |
longnames[i] = headers[i];
|
449 |
453 |
}
|
450 |
|
|
451 |
|
if (nbheaderline > 2)//got types
|
452 |
|
{
|
|
454 |
}
|
|
455 |
|
|
456 |
if (nbheaderline > 2) {// got types
|
453 |
457 |
reader.readRecord();
|
454 |
|
for (int i = 0 ; i < headers.length ; i++)
|
455 |
|
{
|
|
458 |
for (int i = 0; i < headers.length; i++) {
|
456 |
459 |
types[i] = reader.get(headers[i]);
|
457 |
460 |
}
|
458 |
461 |
}
|
459 |
|
else
|
460 |
|
{
|
461 |
|
for (int i = 0 ; i < headers.length ; i++) {
|
|
462 |
else {
|
|
463 |
for (int i = 0; i < headers.length; i++) {
|
462 |
464 |
types[i] = "String";
|
463 |
465 |
}
|
464 |
466 |
}
|
465 |
|
|
|
467 |
|
466 |
468 |
writer.writeStartDocument("UTF-8", "1.0");
|
467 |
469 |
writer.writeStartElement("enrichissement");
|
468 |
470 |
writer.writeStartElement("metadatas");
|
469 |
471 |
writer.writeCharacters("\n");
|
470 |
|
//println "headers : "+Arrays.toString(headers)
|
471 |
|
for (int i = 1 ; i < headers.length ; i++) {
|
|
472 |
// println "headers : "+Arrays.toString(headers)
|
|
473 |
for (int i = 1; i < headers.length; i++) {
|
472 |
474 |
if (headers[i].length() == 0) {
|
473 |
475 |
headers[i] = "noname";
|
474 |
|
System.out.println("Warning: the "+(i+1)+"th column name is empty");
|
|
476 |
System.out.println("Warning: the " + (i + 1) + "th column name is empty");
|
475 |
477 |
}
|
476 |
|
//if(!headers[i].equals("id"))// the first
|
477 |
|
//{
|
|
478 |
// if(!headers[i].equals("id"))// the first
|
|
479 |
// {
|
478 |
480 |
writer.writeStartElement("metadata");
|
479 |
481 |
writer.writeAttribute("id", AsciiUtils.buildId(headers[i]));
|
480 |
482 |
writer.writeAttribute("shortname", headers[i]);
|
... | ... | |
484 |
486 |
writer.writeAttribute("selection", "true");
|
485 |
487 |
writer.writeAttribute("partition", "true");
|
486 |
488 |
writer.writeAttribute("display", "true");
|
487 |
|
|
|
489 |
|
488 |
490 |
writer.writeEndElement();
|
489 |
491 |
writer.writeCharacters("\n");
|
490 |
|
//}
|
|
492 |
// }
|
491 |
493 |
}
|
492 |
|
writer.writeEndElement();//close metadatas
|
|
494 |
writer.writeEndElement();// close metadatas
|
493 |
495 |
writer.writeCharacters("\n");
|
494 |
496 |
|
495 |
497 |
writer.writeStartElement("texts");
|
496 |
498 |
writer.writeCharacters("\n");
|
497 |
|
while (reader.readRecord()) {
|
|
499 |
while (reader.readRecord()) {
|
498 |
500 |
writer.writeStartElement("text");
|
499 |
|
for(int i = 0 ; i < headers.length ; i++)
|
500 |
|
if(headers[i].equals("id")) {
|
|
501 |
for (int i = 0; i < headers.length; i++)
|
|
502 |
if (headers[i].equals("id")) {
|
501 |
503 |
writer.writeAttribute("id", reader.get(headers[i]));
|
502 |
|
} else if(headers[i].equals("xpath")) {
|
|
504 |
}
|
|
505 |
else if (headers[i].equals("xpath")) {
|
503 |
506 |
writer.writeAttribute("xpath", reader.get(headers[i]));
|
504 |
507 |
}
|
505 |
|
|
506 |
|
for(int i = 0 ; i < headers.length ; i++)
|
507 |
|
if(!headers[i].equals("id") && !headers[i].equals("xpath"))
|
508 |
|
{
|
|
508 |
|
|
509 |
for (int i = 0; i < headers.length; i++)
|
|
510 |
if (!headers[i].equals("id") && !headers[i].equals("xpath")) {
|
509 |
511 |
writer.writeEmptyElement("entry");
|
510 |
512 |
writer.writeAttribute("id", AsciiUtils.buildId(headers[i]));
|
511 |
513 |
String value = reader.get(headers[i]);
|
512 |
|
if(value.length() == 0)
|
|
514 |
if (value.length() == 0)
|
513 |
515 |
writer.writeAttribute("value", "N/A");
|
514 |
516 |
else
|
515 |
517 |
writer.writeAttribute("value", value);
|
516 |
|
|
|
518 |
|
517 |
519 |
}
|
518 |
520 |
writer.writeEndElement();
|
519 |
521 |
writer.writeCharacters("\n");
|
520 |
522 |
}
|
521 |
|
writer.writeEndElement();//close texts
|
|
523 |
writer.writeEndElement();// close texts
|
522 |
524 |
writer.writeCharacters("\n");
|
523 |
|
writer.writeEndElement();//close metadatas
|
524 |
|
|
|
525 |
writer.writeEndElement();// close metadatas
|
|
526 |
|
525 |
527 |
reader.close();
|
526 |
528 |
writer.close();
|
527 |
529 |
output.close();
|
528 |
|
|
|
530 |
|
529 |
531 |
output = null;
|
530 |
532 |
writer = null;
|
531 |
533 |
return true;
|
532 |
534 |
}
|
533 |
|
|
|
535 |
|
534 |
536 |
/**
|
535 |
537 |
* Keep only metadatas.
|
536 |
538 |
*
|
537 |
539 |
* @param ids the ids
|
538 |
540 |
*/
|
539 |
|
public void keepOnlyMetadatas(String[] ids)
|
540 |
|
{
|
541 |
|
//println metadatas
|
|
541 |
public void keepOnlyMetadatas(String[] ids) {
|
|
542 |
|
|
543 |
// println metadatas
|
542 |
544 |
List<String> Lids = Arrays.asList(ids);
|
543 |
545 |
headersList.removeAll(Lids);
|
544 |
|
for(int i = 0 ; i < metadatas.size() ; i++)
|
545 |
|
{
|
|
546 |
for (int i = 0; i < metadatas.size(); i++) {
|
546 |
547 |
Metadata m = metadatas.get(i);
|
547 |
|
if(!Lids.contains(m.id))
|
548 |
|
{
|
|
548 |
if (!Lids.contains(m.id)) {
|
549 |
549 |
metadatas.remove(i);
|
550 |
550 |
i--;
|
551 |
551 |
}
|
552 |
552 |
}
|
553 |
|
|
554 |
|
for(TextInjection inj : this.values())
|
555 |
|
{
|
556 |
|
for(int i = 0 ; i < inj.size() ; i++)
|
557 |
|
{
|
|
553 |
|
|
554 |
for (TextInjection inj : this.values()) {
|
|
555 |
for (int i = 0; i < inj.size(); i++) {
|
558 |
556 |
org.txm.metadatas.Entry e = inj.get(i);
|
559 |
|
if(!Lids.contains(e.getId()))
|
560 |
|
{
|
|
557 |
if (!Lids.contains(e.getId())) {
|
561 |
558 |
inj.remove(i);
|
562 |
559 |
i--;
|
563 |
560 |
}
|
564 |
561 |
}
|
565 |
562 |
}
|
566 |
563 |
}
|
567 |
|
|
|
564 |
|
568 |
565 |
/**
|
569 |
566 |
* Inject metadatas in xml txm.
|
570 |
567 |
*
|
571 |
568 |
* @param infile the infile
|
572 |
569 |
* @param outfile the outfile
|
573 |
570 |
* @return true, if successful
|
574 |
|
* @throws XMLStreamException
|
575 |
|
* @throws IOException
|
576 |
|
* @throws MalformedURLException
|
|
571 |
* @throws XMLStreamException
|
|
572 |
* @throws IOException
|
|
573 |
* @throws MalformedURLException
|
577 |
574 |
*/
|
578 |
|
public boolean injectMetadatasInXmlTXM(File infile, File outfile) throws MalformedURLException, IOException, XMLStreamException
|
579 |
|
{
|
580 |
|
return injectMetadatasInXml(infile, outfile,"text", "tei");
|
|
575 |
public boolean injectMetadatasInXmlTXM(File infile, File outfile) throws MalformedURLException, IOException, XMLStreamException {
|
|
576 |
|
|
577 |
return injectMetadatasInXml(infile, outfile, "text", "tei");
|
581 |
578 |
}
|
582 |
|
|
|
579 |
|
583 |
580 |
/**
|
584 |
581 |
* Inject metadatas in xml.
|
585 |
582 |
*
|
... | ... | |
587 |
584 |
* @param outfile the outfile
|
588 |
585 |
* @param tag the tag
|
589 |
586 |
* @return true, if successful
|
590 |
|
* @throws XMLStreamException
|
591 |
|
* @throws IOException
|
592 |
|
* @throws MalformedURLException
|
|
587 |
* @throws XMLStreamException
|
|
588 |
* @throws IOException
|
|
589 |
* @throws MalformedURLException
|
593 |
590 |
*/
|
594 |
|
public boolean injectMetadatasInXml(File infile, File outfile, String tag) throws MalformedURLException, IOException, XMLStreamException
|
595 |
|
{
|
596 |
|
return injectMetadatasInXml(infile, outfile,tag, null);
|
|
591 |
public boolean injectMetadatasInXml(File infile, File outfile, String tag) throws MalformedURLException, IOException, XMLStreamException {
|
|
592 |
|
|
593 |
return injectMetadatasInXml(infile, outfile, tag, null);
|
597 |
594 |
}
|
598 |
|
|
|
595 |
|
599 |
596 |
/**
|
600 |
597 |
* Inject metadatas in xml.
|
601 |
598 |
*
|
... | ... | |
604 |
601 |
* @param tag the tag
|
605 |
602 |
* @param namespace the namespace
|
606 |
603 |
* @return true, if successful
|
607 |
|
* @throws XMLStreamException
|
608 |
|
* @throws IOException
|
609 |
|
* @throws MalformedURLException
|
|
604 |
* @throws XMLStreamException
|
|
605 |
* @throws IOException
|
|
606 |
* @throws MalformedURLException
|
610 |
607 |
*/
|
611 |
|
public boolean injectMetadatasInXml(File infile, File outfile, String tag, String namespace) throws MalformedURLException, IOException, XMLStreamException
|
612 |
|
{
|
|
608 |
public boolean injectMetadatasInXml(File infile, File outfile, String tag, String namespace) throws MalformedURLException, IOException, XMLStreamException {
|
|
609 |
|
613 |
610 |
String key = infile.getName();
|
614 |
|
if(key.lastIndexOf(".") > 0)
|
|
611 |
if (key.lastIndexOf(".") > 0)
|
615 |
612 |
key = key.substring(0, key.lastIndexOf("."));
|
616 |
|
|
|
613 |
|
617 |
614 |
ArrayList<org.txm.metadatas.Entry> metas = get(key);
|
618 |
|
|
|
615 |
|
619 |
616 |
if (metas == null) {
|
620 |
|
System.out.println("\nError: can't find metadata for text of id="+key);
|
|
617 |
System.out.println("\nError: can't find metadata for text of id=" + key);
|
621 |
618 |
System.out.println("Maybe the metadata file doesn't have the right format (comma separated values are needed)");
|
622 |
619 |
return false;
|
623 |
620 |
}
|
624 |
|
|
|
621 |
|
625 |
622 |
AddAttributeInXml builder = new AddAttributeInXml(infile, tag, metas);
|
626 |
623 |
builder.onlyOneElement();
|
627 |
624 |
return builder.process(outfile);
|
628 |
625 |
}
|
629 |
|
|
|
626 |
|
630 |
627 |
/**
|
631 |
628 |
* Save.
|
632 |
629 |
*
|
... | ... | |
634 |
631 |
* @param outfile the outfile
|
635 |
632 |
* @return true, if successful
|
636 |
633 |
*/
|
637 |
|
private boolean save(org.w3c.dom.Node doc, File outfile)
|
638 |
|
{
|
|
634 |
private boolean save(org.w3c.dom.Node doc, File outfile) {
|
|
635 |
|
639 |
636 |
try {
|
640 |
637 |
// Création de la source DOM
|
641 |
638 |
Source source = new DOMSource(doc);
|
642 |
|
|
|
639 |
|
643 |
640 |
// Création du fichier de sortie
|
644 |
|
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
|
|
641 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
|
645 |
642 |
Result resultat = new StreamResult(writer);
|
646 |
|
|
|
643 |
|
647 |
644 |
// Configuration du transformer
|
648 |
645 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
|
649 |
646 |
Transformer transformer = fabrique.newTransformer();
|
650 |
647 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
651 |
648 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
|
652 |
649 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
|
653 |
|
|
|
650 |
|
654 |
651 |
// Transformation
|
655 |
652 |
transformer.transform(source, resultat);
|
656 |
653 |
writer.close();
|
657 |
654 |
doc = null;
|
658 |
655 |
return true;
|
659 |
|
} catch (Exception e) {
|
|
656 |
}
|
|
657 |
catch (Exception e) {
|
660 |
658 |
org.txm.utils.logger.Log.printStackTrace(e);
|
661 |
659 |
return false;
|
662 |
660 |
}
|
663 |
661 |
}
|
664 |
|
|
|
662 |
|
665 |
663 |
/**
|
666 |
664 |
* Insert.
|
667 |
665 |
*
|
... | ... | |
669 |
667 |
* @param xpath the xpath
|
670 |
668 |
* @param pairs the pairs
|
671 |
669 |
* @return true, if successful
|
672 |
|
* @throws XPathExpressionException
|
|
670 |
* @throws XPathExpressionException
|
673 |
671 |
*/
|
674 |
|
public boolean insert(Node doc, String xpath, List<Pair<String, String>> pairs) throws XPathExpressionException
|
675 |
|
{
|
676 |
|
//println ("insert $pairs into $xpath")
|
|
672 |
public boolean insert(Node doc, String xpath, List<Pair<String, String>> pairs) throws XPathExpressionException {
|
|
673 |
|
|
674 |
// println ("insert $pairs into $xpath")
|
677 |
675 |
XPathFactory factory = XPathFactory.newInstance();
|
678 |
676 |
XPath XpathObj = factory.newXPath();
|
679 |
|
|
|
677 |
|
680 |
678 |
XpathObj.setNamespaceContext(nsContext);
|
681 |
679 |
XPathExpression expr = XpathObj.compile(xpath);
|
682 |
|
|
|
680 |
|
683 |
681 |
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
|
684 |
682 |
int count = 0;
|
685 |
|
for(int i = 0 ; i < nodes.getLength() ; i++) {
|
|
683 |
for (int i = 0; i < nodes.getLength(); i++) {
|
686 |
684 |
org.w3c.dom.Node node = nodes.item(i);
|
687 |
|
Element elem = (Element)node;
|
688 |
|
for(Pair<String, String> p : pairs) {
|
689 |
|
//println "add attr "+p.getFirst()+"="+ p.getSecond()
|
|
685 |
Element elem = (Element) node;
|
|
686 |
for (Pair<String, String> p : pairs) {
|
|
687 |
// println "add attr "+p.getFirst()+"="+ p.getSecond()
|
690 |
688 |
elem.setAttribute(p.getFirst(), p.getSecond());
|
691 |
689 |
}
|
692 |
690 |
count++;
|
693 |
691 |
}
|
694 |
|
|
|
692 |
|
695 |
693 |
factory = null;
|
696 |
694 |
XpathObj = null;
|
697 |
695 |
expr = null;
|
698 |
|
if(count > 0)
|
|
696 |
if (count > 0)
|
699 |
697 |
return true;
|
700 |
|
else
|
701 |
|
{
|
702 |
|
System.out.println("Error: no tag has been found with the xpath: "+xpath);
|
|
698 |
else {
|
|
699 |
System.out.println("Error: no tag has been found with the xpath: " + xpath);
|
703 |
700 |
return false;
|
704 |
701 |
}
|
705 |
702 |
}
|
706 |
|
|
|
703 |
|
707 |
704 |
/**
|
708 |
705 |
* Gets the property names.
|
709 |
706 |
*
|
710 |
707 |
* @return the property names
|
711 |
708 |
*/
|
712 |
|
public List<String> getPropertyNames()
|
713 |
|
{
|
|
709 |
public List<String> getPropertyNames() {
|
|
710 |
|
714 |
711 |
return headersList;
|
715 |
712 |
}
|
716 |
|
|
|
713 |
|
717 |
714 |
/**
|
718 |
715 |
* Gets the sattributes.
|
719 |
716 |
*
|
720 |
717 |
* @return the sattributes
|
721 |
718 |
*/
|
722 |
|
public String getSattributes()
|
723 |
|
{
|
|
719 |
public String getSattributes() {
|
|
720 |
|
724 |
721 |
String sattr = "";
|
725 |
|
for(String attr : headersList)
|
726 |
|
sattr += "+"+attr;
|
|
722 |
for (String attr : headersList)
|
|
723 |
sattr += "+" + attr;
|
727 |
724 |
return sattr;
|
728 |
725 |
}
|
729 |
|
|
730 |
|
/* (non-Javadoc)
|
|
726 |
|
|
727 |
/*
|
|
728 |
* (non-Javadoc)
|
731 |
729 |
* @see java.util.AbstractMap#toString()
|
732 |
730 |
*/
|
733 |
731 |
@Override
|
734 |
732 |
public String toString() {
|
|
733 |
|
735 |
734 |
StringBuffer str = new StringBuffer();
|
736 |
735 |
str.append("Metadata: \n");
|
737 |
736 |
for (Metadata data : metadatas) {
|
738 |
|
str.append(" "+data.toString()+"\n");
|
|
737 |
str.append(" " + data.toString() + "\n");
|
739 |
738 |
}
|
740 |
|
|
|
739 |
|
741 |
740 |
str.append("Injections: \n");
|
742 |
|
for(TextInjection injection : this.values()) {
|
743 |
|
str.append(" "+injection.toString()+"\n");
|
|
741 |
for (TextInjection injection : this.values()) {
|
|
742 |
str.append(" " + injection.toString() + "\n");
|
744 |
743 |
}
|
745 |
744 |
return str.toString();
|
746 |
745 |
}
|
747 |
|
|
|
746 |
|
748 |
747 |
/**
|
749 |
748 |
* The main method.
|
750 |
749 |
*
|
751 |
750 |
* @param args the arguments
|
752 |
751 |
*/
|
753 |
752 |
public static void main(String[] args) {
|
|
753 |
|
754 |
754 |
String userhome = System.getProperty("user.home");
|
755 |
755 |
File xmlfile = new File(userhome, "/xml/metadata.xml");
|
756 |
756 |
File csvfile = new File(userhome, "/xml/metadata.csv");
|
757 |
757 |
File odsfile = new File(userhome, "/xml/metadata.ods");
|
758 |
758 |
File xlsxfile = new File(userhome, "/xml/metadata.xlsx");
|
759 |
|
|
760 |
|
Metadatas m = new Metadatas(odsfile, "UTF-8", ",","\"", 1);
|
|
759 |
|
|
760 |
Metadatas m = new Metadatas(odsfile, "UTF-8", ",", "\"", 1);
|
761 |
761 |
if (m.isInitialized()) {
|
762 |
762 |
System.out.println(m.toString());
|
763 |
763 |
}
|
764 |
764 |
}
|
765 |
|
|
|
765 |
|
766 |
766 |
public ArrayList<Metadata> getMetadatas() {
|
|
767 |
|
767 |
768 |
return metadatas;
|
768 |
|
}
|
769 |
|
|
770 |
|
public ArrayList<String> getHeadersList(){
|
|
769 |
}
|
|
770 |
|
|
771 |
public ArrayList<String> getHeadersList() {
|
|
772 |
|
771 |
773 |
return this.headersList;
|
772 |
774 |
}
|
773 |
775 |
}
|