Révision 2037
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Metadata2TeiHeader.groovy (revision 2037) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.democrat |
|
3 |
|
|
4 |
import java.nio.charset.Charset |
|
5 |
|
|
6 |
import org.kohsuke.args4j.* |
|
7 |
import groovy.transform.Field |
|
8 |
import net.sf.saxon.functions.IndexOf |
|
9 |
|
|
10 |
import org.txm.importer.StaxIdentityParser |
|
11 |
import org.txm.objects.* |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.utils.CsvReader |
|
15 |
import org.txm.utils.io.IOUtils |
|
16 |
import org.txm.scripts.importer.XPathResult |
|
17 |
import javax.xml.xpath.XPathConstants |
|
18 |
|
|
19 |
class Metadata2TEiHeader { |
|
20 |
|
|
21 |
int debug = 0; |
|
22 |
public Metadata2TEiHeader(int debug) { |
|
23 |
this.debug = debug |
|
24 |
} |
|
25 |
//@Field @Option(name="teiHeaderTemplateFile", usage="the default teiHeader of texts", widget="FileOpen", required=true, def="teiHeaderTemplateFile.xml") |
|
26 |
//def teiHeaderTemplateFile |
|
27 |
// |
|
28 |
//@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="xpathFile.properties") |
|
29 |
//def xpathFile |
|
30 |
// |
|
31 |
//@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="metadataFile.tsv") |
|
32 |
//def metadataFile |
|
33 |
// |
|
34 |
//@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
35 |
//debug |
|
36 |
// |
|
37 |
//if (!ParametersDialog.open(this)) return; |
|
38 |
//if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
39 |
// |
|
40 |
//def xpathProperties = new Properties() |
|
41 |
//xpathProperties.load(IOUtils.getReader(xpathFile)) |
|
42 |
//println xpathProperties |
|
43 |
// |
|
44 |
//def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8")) |
|
45 |
//println csvReader |
|
46 |
// |
|
47 |
//csvReader.readHeaders(); |
|
48 |
//def header = csvReader.getHeaders() |
|
49 |
//if (!header.contains("id")) { |
|
50 |
// println "** 'id' column not found in $metadataFile header=$header" |
|
51 |
// return; |
|
52 |
//} |
|
53 |
//if (!header.contains("corpus_id")) { |
|
54 |
// println "** 'corpus_id' column not found in $metadataFile header=$header" |
|
55 |
// return; |
|
56 |
//} |
|
57 |
// |
|
58 |
//def corpora = [:] |
|
59 |
//for (Project project : Workspace.getInstance().getProjects()) { |
|
60 |
// for (MainCorpus corpus : project.getChildren(MainCorpus.class)) { |
|
61 |
// corpora[corpus.getID()] = corpus; |
|
62 |
// } |
|
63 |
//} |
|
64 |
// |
|
65 |
//while (csvReader.readRecord()) { |
|
66 |
// String text_id = csvReader.get("id") |
|
67 |
// if (text_id == null || text_id.length() == 0) { |
|
68 |
// //println "** @id not found for record="+csvReader.getRawRecord() |
|
69 |
// continue; |
|
70 |
// } |
|
71 |
// |
|
72 |
// String corpus_id = csvReader.get("corpus_id") |
|
73 |
// if (corpus_id == null || corpus_id.length() == 0) { |
|
74 |
// println "** @corpus_id not found for record="+csvReader.getRawRecord() |
|
75 |
// continue; |
|
76 |
// } |
|
77 |
// |
|
78 |
// MainCorpus corpus = corpora[corpus_id] |
|
79 |
// if (corpus == null) { |
|
80 |
// println "** no corpus found for ID=$corpus_id" |
|
81 |
// continue; |
|
82 |
// } |
|
83 |
// def project = corpus.getProject() |
|
84 |
// |
|
85 |
// def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID()) |
|
86 |
// |
|
87 |
// if (!txmDir.exists()) { |
|
88 |
// println "** the selected corpus has no XML-TXM files. Aborting." |
|
89 |
// } |
|
90 |
// |
|
91 |
// |
|
92 |
// File txmFile = new File(txmDir, text_id+".xml") |
|
93 |
// if (txmFile.exists()) { |
|
94 |
// |
|
95 |
// def data = [:] |
|
96 |
// |
|
97 |
// for (def h : header) data[h] = csvReader.get(h) |
|
98 |
// |
|
99 |
// String xmlteiHeaderContent = getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties); |
|
100 |
// if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) { |
|
101 |
// injecting(txmFile, xmlteiHeaderContent) |
|
102 |
// } else { |
|
103 |
// println "** Text header not updated: $txmFile" |
|
104 |
// } |
|
105 |
// } else { |
|
106 |
// println "** Text not found: $txmFile" |
|
107 |
// } |
|
108 |
//} |
|
109 |
|
|
110 |
def getCustomizedTEiHeader(File teiHeaderTemplateFile, def data, Properties xpathProperties) { |
|
111 |
XPathResult xpathProcessor = new XPathResult(teiHeaderTemplateFile); |
|
112 |
for (String info : data.keySet()) { |
|
113 |
String xpath = xpathProperties[info]; |
|
114 |
String value = data[info] |
|
115 |
if (xpath == null) { |
|
116 |
continue; // not a data to inject |
|
117 |
} |
|
118 |
if (debug > 1) println " injecting '$info'='$value' in '$xpath'" |
|
119 |
|
|
120 |
def expr = xpathProcessor.xpath.compile(xpath); |
|
121 |
def first = expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE); |
|
122 |
if (first != null) { |
|
123 |
switch (first.getNodeType()) { |
|
124 |
case 1: // element |
|
125 |
if (debug > 1) println " patching: $info with "+value |
|
126 |
def newChild = xpathProcessor.doc.createElement(first.getTagName()) |
|
127 |
newChild.appendChild(xpathProcessor.doc.createTextNode(value)) |
|
128 |
|
|
129 |
def attributes = first.getAttributes(); |
|
130 |
for (int i = 0 ; i < attributes.getLength() ; i++) { // copy attributes |
|
131 |
def attr = attributes.item(i) |
|
132 |
newChild.setAttribute(attr.getNodeName(), attr.getNodeValue()) |
|
133 |
} |
|
134 |
|
|
135 |
first.getParentNode().replaceChild(newChild, first) |
|
136 |
break; |
|
137 |
case 2: // attribute |
|
138 |
if (debug > 1) println " patching attribute: $info with "+value |
|
139 |
first.setNodeValue(value) |
|
140 |
break; |
|
141 |
case 3: // text |
|
142 |
if (debug > 1) println " patching text: $info with "+value |
|
143 |
first.setNodeValue(value) |
|
144 |
break; |
|
145 |
default: |
|
146 |
break |
|
147 |
} |
|
148 |
} else { |
|
149 |
//try finding parent and add a new node |
|
150 |
String attribute_xpath = null; |
|
151 |
if (xpath.lastIndexOf("/@") > 0) { |
|
152 |
attribute_xpath = xpath.substring(xpath.lastIndexOf("/@")+2) |
|
153 |
xpath = xpath.substring(0, xpath.lastIndexOf("/@")) |
|
154 |
} |
|
155 |
String parent_xpath = xpath.substring(0, xpath.lastIndexOf("/")) |
|
156 |
String element_xpath = xpath.substring(xpath.lastIndexOf("/")+1) |
|
157 |
def parent_expr = xpathProcessor.xpath.compile(parent_xpath); |
|
158 |
def parent = parent_expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE); |
|
159 |
if (parent != null) { |
|
160 |
String namespace = element_xpath.substring(element_xpath.indexOf(":")) |
|
161 |
String nodename = element_xpath.substring(element_xpath.indexOf(":")+1) |
|
162 |
def attributes = [:] |
|
163 |
if (nodename.contains("[")) { |
|
164 |
|
|
165 |
String attributesString = nodename.substring(nodename.indexOf("[")+1, nodename.length()-1) |
|
166 |
for (String attributeString : attributesString.split(",")) { |
|
167 |
def split = attributeString.split("=") |
|
168 |
attributes[split[0].substring(1)] = split[1].substring(1, split[1].length()-1) // remove @ and remove "" |
|
169 |
} |
|
170 |
nodename = nodename.substring(0, nodename.indexOf("[")) |
|
171 |
} |
|
172 |
def newChild = xpathProcessor.doc.createElement(nodename) |
|
173 |
for (String attributeName : attributes.keySet()) { |
|
174 |
newChild.setAttribute(attributeName, attributes[attributeName]) |
|
175 |
} |
|
176 |
|
|
177 |
if (attribute_xpath != null) { |
|
178 |
newChild.setAttribute(attribute_xpath, value) |
|
179 |
} else { |
|
180 |
newChild.appendChild(xpathProcessor.doc.createTextNode(value)) |
|
181 |
} |
|
182 |
|
|
183 |
if (debug > 1) println " creating node $parent_xpath / $nodename[$attributes] : $attribute_xpath = $value" |
|
184 |
parent.appendChild(newChild) |
|
185 |
} else { |
|
186 |
println " ** info=$info not found or created for "+xpathProperties[info] |
|
187 |
} |
|
188 |
} |
|
189 |
} |
|
190 |
|
|
191 |
// return xpathProcessor.doc.getElementsByTagName("teiHeader").item(0).getNodeValue(); |
|
192 |
//println "DOC="+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0).toString() |
|
193 |
String content = ""+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0) |
|
194 |
content = content.substring('<?xml version="1.0" encoding="UTF-8"?>'.length()) |
|
195 |
} |
|
196 |
|
|
197 |
/** |
|
198 |
* Replace the teiHeader |
|
199 |
* @param txmFile |
|
200 |
* @param data |
|
201 |
* @param paths |
|
202 |
* @param xmlteiHeaderContent |
|
203 |
* @return |
|
204 |
*/ |
|
205 |
def replaceHeader(File txmFile, String xmlteiHeaderContent) { |
|
206 |
println " editing: $txmFile..." |
|
207 |
StaxIdentityParser sparser = new StaxIdentityParser(txmFile) { |
|
208 |
boolean start = false; |
|
209 |
public void processStartElement() { |
|
210 |
|
|
211 |
if (localname == "teiHeader") { |
|
212 |
start = true; |
|
213 |
if (debug > 2) println " replacing teiHeader" |
|
214 |
if (debug > 3) println " with $xmlteiHeaderContent" |
|
215 |
output.write(xmlteiHeaderContent.getBytes(Charset.forName("UTF-8"))); // REPLACE CONTENT ! |
|
216 |
} |
|
217 |
if (!start) { |
|
218 |
super.processStartElement(); |
|
219 |
} |
|
220 |
} |
|
221 |
|
|
222 |
public void processEndElement() { |
|
223 |
if (!start) { |
|
224 |
super.processEndElement(); |
|
225 |
} |
|
226 |
if (localname == "teiHeader") { |
|
227 |
start = false; |
|
228 |
if (debug > 2) println " replace done" |
|
229 |
} |
|
230 |
} |
|
231 |
|
|
232 |
public void processCharacters() { |
|
233 |
if (!start) { |
|
234 |
super.processCharacters(); |
|
235 |
} |
|
236 |
} |
|
237 |
|
|
238 |
public void processComment() { |
|
239 |
if (!start) { |
|
240 |
super.processComment(); |
|
241 |
} |
|
242 |
} |
|
243 |
} |
|
244 |
|
|
245 |
File tmpDirectory = new File("/home/mdecorde/TEMP"); |
|
246 |
File outfile = new File(tmpDirectory, "tmp_"+txmFile.getName()) |
|
247 |
File copyFile = new File(tmpDirectory, "copy_"+txmFile.getName()) |
|
248 |
|
|
249 |
if (sparser.process(outfile)) { // replace inputFile |
|
250 |
if (txmFile.renameTo(copyFile)) { |
|
251 |
if (outfile.renameTo(txmFile)) { |
|
252 |
println " -> SUCCESS see $txmFile" |
|
253 |
return true |
|
254 |
} else { |
|
255 |
println " -> FAIL could not replace $txmFile" |
|
256 |
println " -> see result in $outfile" |
|
257 |
|
|
258 |
} |
|
259 |
} else { |
|
260 |
println " -> FAIL could not make a copy of $txmFile in $copyFile" |
|
261 |
println " -> see result in $outfile" |
|
262 |
} |
|
263 |
|
|
264 |
} else { |
|
265 |
println " -> FAIL see $outfile" |
|
266 |
} |
|
267 |
|
|
268 |
return false; |
|
269 |
} |
|
270 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Metadata2TEIHeader.groovy (revision 2037) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.urs.democrat |
|
3 |
|
|
4 |
import java.nio.charset.Charset |
|
5 |
|
|
6 |
import org.kohsuke.args4j.* |
|
7 |
import groovy.transform.Field |
|
8 |
import net.sf.saxon.functions.IndexOf |
|
9 |
|
|
10 |
import org.txm.importer.StaxIdentityParser |
|
11 |
import org.txm.objects.* |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.utils.CsvReader |
|
15 |
import org.txm.utils.io.IOUtils |
|
16 |
import org.txm.scripts.importer.XPathResult |
|
17 |
import javax.xml.xpath.XPathConstants |
|
18 |
|
|
19 |
class Metadata2TEIHeader { |
|
20 |
|
|
21 |
int debug = 0; |
|
22 |
public Metadata2TEIHeader(int debug) { |
|
23 |
this.debug = debug |
|
24 |
} |
|
25 |
//@Field @Option(name="teiHeaderTemplateFile", usage="the default teiHeader of texts", widget="FileOpen", required=true, def="teiHeaderTemplateFile.xml") |
|
26 |
//def teiHeaderTemplateFile |
|
27 |
// |
|
28 |
//@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="xpathFile.properties") |
|
29 |
//def xpathFile |
|
30 |
// |
|
31 |
//@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="metadataFile.tsv") |
|
32 |
//def metadataFile |
|
33 |
// |
|
34 |
//@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
35 |
//debug |
|
36 |
// |
|
37 |
//if (!ParametersDialog.open(this)) return; |
|
38 |
//if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
39 |
// |
|
40 |
//def xpathProperties = new Properties() |
|
41 |
//xpathProperties.load(IOUtils.getReader(xpathFile)) |
|
42 |
//println xpathProperties |
|
43 |
// |
|
44 |
//def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8")) |
|
45 |
//println csvReader |
|
46 |
// |
|
47 |
//csvReader.readHeaders(); |
|
48 |
//def header = csvReader.getHeaders() |
|
49 |
//if (!header.contains("id")) { |
|
50 |
// println "** 'id' column not found in $metadataFile header=$header" |
|
51 |
// return; |
|
52 |
//} |
|
53 |
//if (!header.contains("corpus_id")) { |
|
54 |
// println "** 'corpus_id' column not found in $metadataFile header=$header" |
|
55 |
// return; |
|
56 |
//} |
|
57 |
// |
|
58 |
//def corpora = [:] |
|
59 |
//for (Project project : Workspace.getInstance().getProjects()) { |
|
60 |
// for (MainCorpus corpus : project.getChildren(MainCorpus.class)) { |
|
61 |
// corpora[corpus.getID()] = corpus; |
|
62 |
// } |
|
63 |
//} |
|
64 |
// |
|
65 |
//while (csvReader.readRecord()) { |
|
66 |
// String text_id = csvReader.get("id") |
|
67 |
// if (text_id == null || text_id.length() == 0) { |
|
68 |
// //println "** @id not found for record="+csvReader.getRawRecord() |
|
69 |
// continue; |
|
70 |
// } |
|
71 |
// |
|
72 |
// String corpus_id = csvReader.get("corpus_id") |
|
73 |
// if (corpus_id == null || corpus_id.length() == 0) { |
|
74 |
// println "** @corpus_id not found for record="+csvReader.getRawRecord() |
|
75 |
// continue; |
|
76 |
// } |
|
77 |
// |
|
78 |
// MainCorpus corpus = corpora[corpus_id] |
|
79 |
// if (corpus == null) { |
|
80 |
// println "** no corpus found for ID=$corpus_id" |
|
81 |
// continue; |
|
82 |
// } |
|
83 |
// def project = corpus.getProject() |
|
84 |
// |
|
85 |
// def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID()) |
|
86 |
// |
|
87 |
// if (!txmDir.exists()) { |
|
88 |
// println "** the selected corpus has no XML-TXM files. Aborting." |
|
89 |
// } |
|
90 |
// |
|
91 |
// |
|
92 |
// File txmFile = new File(txmDir, text_id+".xml") |
|
93 |
// if (txmFile.exists()) { |
|
94 |
// |
|
95 |
// def data = [:] |
|
96 |
// |
|
97 |
// for (def h : header) data[h] = csvReader.get(h) |
|
98 |
// |
|
99 |
// String xmlteiHeaderContent = getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties); |
|
100 |
// if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) { |
|
101 |
// injecting(txmFile, xmlteiHeaderContent) |
|
102 |
// } else { |
|
103 |
// println "** Text header not updated: $txmFile" |
|
104 |
// } |
|
105 |
// } else { |
|
106 |
// println "** Text not found: $txmFile" |
|
107 |
// } |
|
108 |
//} |
|
109 |
|
|
110 |
def getCustomizedTEiHeader(File teiHeaderTemplateFile, def data, Properties xpathProperties) { |
|
111 |
XPathResult xpathProcessor = new XPathResult(teiHeaderTemplateFile); |
|
112 |
for (String info : data.keySet()) { |
|
113 |
String xpath = xpathProperties[info]; |
|
114 |
String value = data[info] |
|
115 |
if (xpath == null) { |
|
116 |
continue; // not a data to inject |
|
117 |
} |
|
118 |
if (debug > 1) println " injecting '$info'='$value' in '$xpath'" |
|
119 |
|
|
120 |
def expr = xpathProcessor.xpath.compile(xpath); |
|
121 |
def first = expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE); |
|
122 |
if (first != null) { |
|
123 |
switch (first.getNodeType()) { |
|
124 |
case 1: // element |
|
125 |
if (debug > 1) println " patching: $info with "+value |
|
126 |
def newChild = xpathProcessor.doc.createElement(first.getTagName()) |
|
127 |
newChild.appendChild(xpathProcessor.doc.createTextNode(value)) |
|
128 |
|
|
129 |
def attributes = first.getAttributes(); |
|
130 |
for (int i = 0 ; i < attributes.getLength() ; i++) { // copy attributes |
|
131 |
def attr = attributes.item(i) |
|
132 |
newChild.setAttribute(attr.getNodeName(), attr.getNodeValue()) |
|
133 |
} |
|
134 |
|
|
135 |
first.getParentNode().replaceChild(newChild, first) |
|
136 |
break; |
|
137 |
case 2: // attribute |
|
138 |
if (debug > 1) println " patching attribute: $info with "+value |
|
139 |
first.setNodeValue(value) |
|
140 |
break; |
|
141 |
case 3: // text |
|
142 |
if (debug > 1) println " patching text: $info with "+value |
|
143 |
first.setNodeValue(value) |
|
144 |
break; |
|
145 |
default: |
|
146 |
break |
|
147 |
} |
|
148 |
} else { |
|
149 |
//try finding parent and add a new node |
|
150 |
String attribute_xpath = null; |
|
151 |
if (xpath.lastIndexOf("/@") > 0) { |
|
152 |
attribute_xpath = xpath.substring(xpath.lastIndexOf("/@")+2) |
|
153 |
xpath = xpath.substring(0, xpath.lastIndexOf("/@")) |
|
154 |
} |
|
155 |
String parent_xpath = xpath.substring(0, xpath.lastIndexOf("/")) |
|
156 |
String element_xpath = xpath.substring(xpath.lastIndexOf("/")+1) |
|
157 |
def parent_expr = xpathProcessor.xpath.compile(parent_xpath); |
|
158 |
def parent = parent_expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE); |
|
159 |
if (parent != null) { |
|
160 |
String namespace = element_xpath.substring(element_xpath.indexOf(":")) |
|
161 |
String nodename = element_xpath.substring(element_xpath.indexOf(":")+1) |
|
162 |
def attributes = [:] |
|
163 |
if (nodename.contains("[")) { |
|
164 |
|
|
165 |
String attributesString = nodename.substring(nodename.indexOf("[")+1, nodename.length()-1) |
|
166 |
for (String attributeString : attributesString.split(",")) { |
|
167 |
def split = attributeString.split("=") |
|
168 |
attributes[split[0].substring(1)] = split[1].substring(1, split[1].length()-1) // remove @ and remove "" |
|
169 |
} |
|
170 |
nodename = nodename.substring(0, nodename.indexOf("[")) |
|
171 |
} |
|
172 |
def newChild = xpathProcessor.doc.createElement(nodename) |
|
173 |
for (String attributeName : attributes.keySet()) { |
|
174 |
newChild.setAttribute(attributeName, attributes[attributeName]) |
|
175 |
} |
|
176 |
|
|
177 |
if (attribute_xpath != null) { |
|
178 |
newChild.setAttribute(attribute_xpath, value) |
|
179 |
} else { |
|
180 |
newChild.appendChild(xpathProcessor.doc.createTextNode(value)) |
|
181 |
} |
|
182 |
|
|
183 |
if (debug > 1) println " creating node $parent_xpath / $nodename[$attributes] : $attribute_xpath = $value" |
|
184 |
parent.appendChild(newChild) |
|
185 |
} else { |
|
186 |
println " ** info=$info not found or created for "+xpathProperties[info] |
|
187 |
} |
|
188 |
} |
|
189 |
} |
|
190 |
|
|
191 |
// return xpathProcessor.doc.getElementsByTagName("teiHeader").item(0).getNodeValue(); |
|
192 |
//println "DOC="+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0).toString() |
|
193 |
String content = ""+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0) |
|
194 |
content = content.substring('<?xml version="1.0" encoding="UTF-8"?>'.length()) |
|
195 |
} |
|
196 |
|
|
197 |
/** |
|
198 |
* Replace the teiHeader |
|
199 |
* @param txmFile |
|
200 |
* @param data |
|
201 |
* @param paths |
|
202 |
* @param xmlteiHeaderContent |
|
203 |
* @return |
|
204 |
*/ |
|
205 |
def replaceHeader(File txmFile, String xmlteiHeaderContent) { |
|
206 |
println " editing: $txmFile..." |
|
207 |
StaxIdentityParser sparser = new StaxIdentityParser(txmFile) { |
|
208 |
boolean start = false; |
|
209 |
public void processStartElement() { |
|
210 |
|
|
211 |
if (localname == "teiHeader") { |
|
212 |
start = true; |
|
213 |
if (debug > 2) println " replacing teiHeader" |
|
214 |
if (debug > 3) println " with $xmlteiHeaderContent" |
|
215 |
output.write(xmlteiHeaderContent.getBytes(Charset.forName("UTF-8"))); // REPLACE CONTENT ! |
|
216 |
} |
|
217 |
if (!start) { |
|
218 |
super.processStartElement(); |
|
219 |
} |
|
220 |
} |
|
221 |
|
|
222 |
public void processEndElement() { |
|
223 |
if (!start) { |
|
224 |
super.processEndElement(); |
|
225 |
} |
|
226 |
if (localname == "teiHeader") { |
|
227 |
start = false; |
|
228 |
if (debug > 2) println " replace done" |
|
229 |
} |
|
230 |
} |
|
231 |
|
|
232 |
public void processCharacters() { |
|
233 |
if (!start) { |
|
234 |
super.processCharacters(); |
|
235 |
} |
|
236 |
} |
|
237 |
|
|
238 |
public void processComment() { |
|
239 |
if (!start) { |
|
240 |
super.processComment(); |
|
241 |
} |
|
242 |
} |
|
243 |
} |
|
244 |
|
|
245 |
File tmpDirectory = new File("/home/mdecorde/TEMP"); |
|
246 |
File outfile = new File(tmpDirectory, "tmp_"+txmFile.getName()) |
|
247 |
File copyFile = new File(tmpDirectory, "copy_"+txmFile.getName()) |
|
248 |
|
|
249 |
if (sparser.process(outfile)) { // replace inputFile |
|
250 |
if (txmFile.renameTo(copyFile)) { |
|
251 |
if (outfile.renameTo(txmFile)) { |
|
252 |
println " -> SUCCESS see $txmFile" |
|
253 |
return true |
|
254 |
} else { |
|
255 |
println " -> FAIL could not replace $txmFile" |
|
256 |
println " -> see result in $outfile" |
|
257 |
|
|
258 |
} |
|
259 |
} else { |
|
260 |
println " -> FAIL could not make a copy of $txmFile in $copyFile" |
|
261 |
println " -> see result in $outfile" |
|
262 |
} |
|
263 |
|
|
264 |
} else { |
|
265 |
println " -> FAIL see $outfile" |
|
266 |
} |
|
267 |
|
|
268 |
return false; |
|
269 |
} |
|
270 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2037) | ||
---|---|---|
150 | 150 |
println " compute measures..." |
151 | 151 |
def data = new ComputeDemocratMeasureMetadata().getStats(corpus, xpathProperties) |
152 | 152 |
|
153 |
Metadata2TEiHeader mthm = new Metadata2TEiHeader(debug);
|
|
153 |
Metadata2TEIHeader mthm = new Metadata2TEIHeader(debug);
|
|
154 | 154 |
for (def h : header) data[h] = csvReader.get(h) |
155 | 155 |
|
156 | 156 |
println " creating teiHeader..." |
Formats disponibles : Unified diff