Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / democrat / Metadata2TeiHeaderMacro.groovy @ 2027

History | View | Annotate | Download (7 kB)

1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.democrat
3

    
4
import java.nio.charset.Charset
5

    
6
import org.kohsuke.args4j.*
7
import groovy.transform.Field
8

    
9
import org.txm.importer.StaxIdentityParser
10
import org.txm.objects.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.searchengine.cqp.corpus.*
13
import org.txm.utils.CsvReader
14
import org.txm.utils.io.IOUtils
15
import org.txm.scripts.importer.XPathResult
16
import javax.xml.xpath.XPathConstants
17

    
18
class Metadata2TEiHeaderMacro {
19
        
20
        int debug = 0;
21
        public Metadata2TEiHeaderMacro(int debug) {
22
                this.debug = debug
23
        }
24
        //@Field @Option(name="teiHeaderTemplateFile", usage="the default teiHeader of texts", widget="FileOpen", required=true, def="teiHeaderTemplateFile.xml")
25
        //def teiHeaderTemplateFile
26
        //
27
        //@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="xpathFile.properties")
28
        //def xpathFile
29
        //
30
        //@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="metadataFile.tsv")
31
        //def metadataFile
32
        //
33
        //@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
34
        //debug
35
        //
36
        //if (!ParametersDialog.open(this)) return;
37
        //if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
38
        //
39
        //
40
        //
41
        //def xpathProperties = new Properties()
42
        //xpathProperties.load(IOUtils.getReader(xpathFile))
43
        //println xpathProperties
44
        //
45
        //def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
46
        //println csvReader
47
        //
48
        //csvReader.readHeaders();
49
        //def header = csvReader.getHeaders()
50
        //if (!header.contains("id")) {
51
        //        println "** 'id' column not found in $metadataFile header=$header"
52
        //        return;
53
        //}
54
        //if (!header.contains("corpus_id")) {
55
        //        println "** 'corpus_id' column not found in $metadataFile header=$header"
56
        //        return;
57
        //}
58
        //
59
        //def corpora = [:]
60
        //for (Project project : Workspace.getInstance().getProjects()) {
61
        //        for (MainCorpus corpus : project.getChildren(MainCorpus.class)) {
62
        //                corpora[corpus.getID()] = corpus;
63
        //        }
64
        //}
65
        //
66
        //while (csvReader.readRecord())        {
67
        //        String text_id = csvReader.get("id")
68
        //        if (text_id == null || text_id.length() == 0) {
69
        //                //println "** @id not found for record="+csvReader.getRawRecord()
70
        //                continue;
71
        //        }
72
        //
73
        //        String corpus_id = csvReader.get("corpus_id")
74
        //        if (corpus_id == null || corpus_id.length() == 0) {
75
        //                println "** @corpus_id not found for record="+csvReader.getRawRecord()
76
        //                continue;
77
        //        }
78
        //
79
        //        MainCorpus corpus = corpora[corpus_id]
80
        //        if (corpus == null) {
81
        //                println "** no corpus found for ID=$corpus_id"
82
        //                continue;
83
        //        }
84
        //        def project = corpus.getProject()
85
        //
86
        //        def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID())
87
        //
88
        //        if (!txmDir.exists()) {
89
        //                println "** the selected corpus has no XML-TXM files. Aborting."
90
        //        }
91
        //
92
        //
93
        //        File txmFile = new File(txmDir, text_id+".xml")
94
        //        if (txmFile.exists()) {
95
        //
96
        //                def data = [:]
97
        //
98
        //                for (def h : header) data[h] = csvReader.get(h)
99
        //
100
        //                String xmlteiHeaderContent = getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties);
101
        //                if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) {
102
        //                        injecting(txmFile, xmlteiHeaderContent)
103
        //                } else {
104
        //                        println "** Text header not updated: $txmFile"
105
        //                }
106
        //        } else {
107
        //                println "** Text not found: $txmFile"
108
        //        }
109
        //}
110

    
111
        def getCustomizedTEiHeader(File teiHeaderTemplateFile, def data, Properties xpathProperties) {
112
                XPathResult xpathProcessor = new XPathResult(teiHeaderTemplateFile);
113
                for (String info : data.keySet()) {
114
                        String xpath = xpathProperties[info];
115
                        String value = data[info]
116
                        if (xpath == null) {
117
                                continue; // not a data to inject
118
                        }
119
                        if (debug > 1) println "   injecting '$info'='$value' in '$xpath'"
120

    
121
                        def expr = xpathProcessor.xpath.compile(xpath);
122
                        def first = expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE);
123
                        if (first != null) {
124
                                switch (first.getNodeType()) {
125
                                        case 1: // element
126
                                                if (debug > 1) println "   patching: $info with "+value
127
                                                def newChild = xpathProcessor.doc.createElement(first.getTagName())
128
                                                newChild.appendChild(xpathProcessor.doc.createTextNode(value))
129
                                                first.getParentNode().replaceChild(newChild, first)
130
                                                break;
131
                                        case 2: // attribute
132
                                                if (debug > 1) println "   patching attribute: $info with "+value
133
                                                first.setNodeValue(value)
134
                                                break;
135
                                        case 3: // text
136
                                                if (debug > 1) println "   patching text: $info with "+value
137
                                                first.setNodeValue(value)
138
                                                break;
139
                                        default:
140
                                                break
141
                                }
142
                        } else {
143
                                println "** not found $xpath"
144
                        }
145
                }
146

    
147
                //        return xpathProcessor.doc.getElementsByTagName("teiHeader").item(0).getNodeValue();
148
                //println "DOC="+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0).toString()
149
                String content = ""+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0)
150
                content = content.substring('<?xml version="1.0" encoding="UTF-8"?>'.length())
151
        }
152

    
153
        /**
154
         * Replace the teiHeader 
155
         * @param txmFile
156
         * @param data
157
         * @param paths
158
         * @param xmlteiHeaderContent
159
         * @return
160
         */
161
        def injecting(File txmFile, String xmlteiHeaderContent) {
162
                println "Editing: $txmFile..."
163
                StaxIdentityParser sparser = new StaxIdentityParser(txmFile) {
164
                                        boolean start = false;
165
                                        public void processStartElement() {
166

    
167
                                                if (localname == "teiHeader") {
168
                                                        start = true;
169
                                                        if (debug > 2) println "   replacing teiHeader"
170
                                                        if (debug > 3) println "       with $xmlteiHeaderContent"
171
                                                        output.write(xmlteiHeaderContent.getBytes(Charset.forName("UTF-8")));  // REPLACE CONTENT !
172
                                                }
173
                                                if (!start) {
174
                                                        super.processStartElement();
175
                                                }
176
                                        }
177

    
178
                                        public void processEndElement() {
179
                                                if (!start) {
180
                                                        super.processEndElement();
181
                                                }
182
                                                if (localname == "teiHeader") {
183
                                                        start = false;
184
                                                        if (debug > 2) println "   replace done"
185
                                                }
186
                                        }
187

    
188
                                        public void processCharacters() {
189
                                                if (!start) {
190
                                                        super.processCharacters();
191
                                                }
192
                                        }
193

    
194
                                        public void processComment() {
195
                                                if (!start) {
196
                                                        super.processComment();
197
                                                }
198
                                        }
199
                                }
200

    
201
                File tmpDirectory = new File("/home/mdecorde/TEMP");
202
                File outfile = new File(tmpDirectory, "tmp_"+txmFile.getName())
203
                File copyFile = new File(tmpDirectory, "copy_"+txmFile.getName())
204

    
205
                if (sparser.process(outfile)) { // replace inputFile
206
                        if (txmFile.renameTo(copyFile)) {
207
                                if (outfile.renameTo(txmFile)) {
208
                                        println " -> SUCCESS see $txmFile"
209
                                        return true
210
                                } else {
211
                                        println " -> FAIL could not replace $txmFile"
212
                                        println " -> see result in $outfile"
213

    
214
                                }
215
                        } else {
216
                                println " -> FAIL could not make a copy of $txmFile in $copyFile"
217
                                println " -> see result in $outfile"
218
                        }
219

    
220
                } else {
221
                        println " -> FAIL see $outfile"
222
                }
223

    
224
                return false;
225
        }
226
}