Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / imports / TeiHeader2MetadataCSVMacro.groovy @ 479

History | View | Annotate | Download (2.9 kB)

1
// Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author sheiden
4

    
5
package org.txm.macro.imports
6

    
7
import org.kohsuke.args4j.*
8
import groovy.transform.Field
9
import org.txm.rcpapplication.swt.widget.parameters.*
10
import org.txm.importer.XPathResult
11

    
12
@Field @Option(name="inputDirectory", usage="input directory (contains XML files)", widget="Folder", required=true, def="input")
13
def inputDirectory
14

    
15
@Field @Option(name="propertiesFile", usage="metadata=XPath properties input file", widget="File", required=true, def="import.properties")
16
File propertiesFile
17

    
18
@Field @Option(name="CSVoutputFile", usage="CSV output file", widget="File", required=true, def="metadata.csv")
19
File CSVoutputFile
20

    
21
@Field @Option(name="columnSeparator", usage="column separator for CSV file", widget="String", required=true, def=",")
22
def columnSeparator
23

    
24
@Field @Option(name="textSeparator", usage="text separator for CSV file", widget="String", required=true, def="\"")
25
def textSeparator
26

    
27
// Open the parameters input dialog box
28
if (!ParametersDialog.open(this)) return
29

    
30
writer = CSVoutputFile.newWriter("UTF-8")
31

    
32
metadataXPaths = new Properties()
33
if (propertiesFile.exists() && propertiesFile.canRead()) {
34
        InputStreamReader input = new InputStreamReader(new FileInputStream(propertiesFile) , "UTF-8")
35
        metadataXPaths.load(input)
36
        input.close()
37
} else {
38
        println "** TeiHeader2MetadataCSV: '$propertiesFile' file not found."
39
        return
40
}
41

    
42
List<File> files = inputDirectory.listFiles()
43
if (files == null || files.size() == 0) {
44
        println "** TeiHeader2MetadataCSV: no files found in '$inputDirectory' directory."
45
        return
46
}
47

    
48
for (int i = 0 ; i < files.size() ; i++)
49
        if (!(files.get(i).getName().endsWith(".xml")) || files.get(i).getName().equals("import.xml"))
50
                files.remove(i--);
51

    
52
if (files.size() == 0) {
53
        println "** TeiHeader2MetadataCSV: no usefull files found in '$inputDirectory' directory."
54
        return
55
}
56

    
57
// println "files = "+files
58

    
59
writer << "id"+columnSeparator
60

    
61
metadataNames = metadataXPaths.keySet().sort()
62

    
63
isFirst = true
64
metadataNames.each {
65

    
66
        if (isFirst) { isFirst = false
67
        } else {
68
                writer << columnSeparator
69
        }
70
        writer << it
71
}
72
writer << "\n"
73

    
74
for (File f : files) {
75
        filename = f.getName()
76
        filename = filename.substring(0, filename.lastIndexOf("."))
77
        writer << filename+columnSeparator
78

    
79
        metadataValues = new HashMap<String, String>()
80
        def xpathprocessor = new XPathResult(f)
81
        metadataNames.each { name ->
82
                value = xpathprocessor.getXpathResponse(metadataXPaths.get(name), "N/A")
83
                value = value.trim().replaceAll(/[ \t\n]+/, " ")
84
                metadataValues.put(name, value)
85
                // println sprintf("%s: %s = %s", filename, name, value)
86
        }
87
        xpathprocessor.close()
88

    
89
        isFirst = true
90
        metadataNames.each {
91

    
92
                if (isFirst) { isFirst = false
93
                } else {
94
                        writer << columnSeparator
95
                }
96
                writer << textSeparator+metadataValues.get(it)+textSeparator
97
        }
98
        writer << "\n"
99
}
100

    
101
writer.close()
102