Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / corpus / Metadata2TeiHeaderMacro.groovy @ 1969

History | View | Annotate | Download (3.2 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macro.corpus
3

    
4
import java.nio.charset.Charset
5

    
6
import org.kohsuke.args4j.*
7
import groovy.transform.Field
8

    
9
import org.txm.importer.StaxIdentityParser
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.searchengine.cqp.corpus.*
12
import org.txm.utils.CsvReader
13
import org.txm.utils.io.IOUtils
14

    
15
if (!(corpusViewSelection instanceof CQPCorpus)) {
16
        println "** Corpora selection is not a Corpus. Aborting."
17
        return;
18
}
19

    
20
@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="file.xml")
21
def xpathFile
22

    
23
@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="file.xml")
24
def metadataFile
25

    
26
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
27
debug
28

    
29
if (!ParametersDialog.open(this)) return;
30
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
31

    
32

    
33
def corpus = corpusViewSelection
34
def project = corpus.getProject()
35

    
36
def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID())
37

    
38
if (!txmDir.exists()) {
39
        println "** the selected corpus has no XML-TXM files. Aborting."
40
}
41

    
42
def xpathProperties = new Properties()
43
xpathProperties.load(IOUtils.getReader(xpathFile))
44
println xpathProperties
45

    
46
def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
47
println csvReader
48

    
49
csvReader.readHeaders();
50
def header = csvReader.getHeaders()
51
if (!header.contains("id")) {
52
        println "** 'id' column not found in $metadataFile"
53
        return;
54
}
55
while (csvReader.readRecord())        {
56
        String text_id = csvReader.get("id")
57
        if (text_id == null || text_id.length() == 0) {
58
                println "** @id not found for record="+csvReader.getRawRecord()
59
                continue;
60
        }
61
        File txmFile = new File(txmDir, text_id+".xml")
62
        if (txmFile.exists()) {
63
                
64
                def data = [:]
65
                for (def h : header) data[h] = csvReader.get(h)
66
                        
67
                injecting(txmFile, data, xpathProperties)
68
        } else {
69
                println "** Text not found: $txmFile"
70
        }
71
}
72

    
73
def injecting(File txmFile, def data, def paths) {
74
        println "Editing: $txmFile..."
75
        StaxIdentityParser sparser = new StaxIdentityParser(txmFile) {
76
                String currentXPath = ""
77
                boolean start = false;
78
                def foundPaths = [:]
79
                public void processStartElement() {
80
                        super.processStartElement();
81
                        
82
                        if (localname == "teiHeader") start = true;
83
                        if (start) {
84
                                currentXPath += "/"+localname 
85
                                def attributes = [:]
86
                                for (int i = 0 ; i < parser.getAttributeCount() ; i++) {
87
                                        attributes[parser.getAttributeLocalName(i)] = parser.getAttributeValue(i);
88
                                }
89
                                if (attributes.size() > 0)
90
                                        println currentXPath+attributes
91
                                else 
92
                                        println currentXPath
93
                        }
94
                }
95
                
96
                public void processEndElement() {
97
                        super.processEndElement();
98
                        if (localname == "teiHeader") start = false;
99
                        if (start) {
100
                                currentXPath = currentXPath.substring(0, currentXPath.lastIndexOf("/"))
101
                        }
102
                }
103
        }
104
        File outfile = File.createTempFile("txm", txmFile.getName())
105
        
106
        boolean ret = sparser.process(outfile);
107
        if (ret) { // replace inputFile
108
                
109
        }
110
        
111
        return ret;                
112
}