Révision 2029

tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Metadata2TeiHeaderMacro.groovy (revision 2029)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.democrat
3

  
4
import java.nio.charset.Charset
5

  
6
import org.kohsuke.args4j.*
7
import groovy.transform.Field
8

  
9
import org.txm.importer.StaxIdentityParser
10
import org.txm.objects.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.searchengine.cqp.corpus.*
13
import org.txm.utils.CsvReader
14
import org.txm.utils.io.IOUtils
15
import org.txm.scripts.importer.XPathResult
16
import javax.xml.xpath.XPathConstants
17

  
18
class Metadata2TEiHeaderMacro {
19
	
20
	int debug = 0;
21
	public Metadata2TEiHeaderMacro(int debug) {
22
		this.debug = debug
23
	}
24
	//@Field @Option(name="teiHeaderTemplateFile", usage="the default teiHeader of texts", widget="FileOpen", required=true, def="teiHeaderTemplateFile.xml")
25
	//def teiHeaderTemplateFile
26
	//
27
	//@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="xpathFile.properties")
28
	//def xpathFile
29
	//
30
	//@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="metadataFile.tsv")
31
	//def metadataFile
32
	//
33
	//@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
34
	//debug
35
	//
36
	//if (!ParametersDialog.open(this)) return;
37
	//if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
38
	//
39
	//
40
	//
41
	//def xpathProperties = new Properties()
42
	//xpathProperties.load(IOUtils.getReader(xpathFile))
43
	//println xpathProperties
44
	//
45
	//def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
46
	//println csvReader
47
	//
48
	//csvReader.readHeaders();
49
	//def header = csvReader.getHeaders()
50
	//if (!header.contains("id")) {
51
	//	println "** 'id' column not found in $metadataFile header=$header"
52
	//	return;
53
	//}
54
	//if (!header.contains("corpus_id")) {
55
	//	println "** 'corpus_id' column not found in $metadataFile header=$header"
56
	//	return;
57
	//}
58
	//
59
	//def corpora = [:]
60
	//for (Project project : Workspace.getInstance().getProjects()) {
61
	//	for (MainCorpus corpus : project.getChildren(MainCorpus.class)) {
62
	//		corpora[corpus.getID()] = corpus;
63
	//	}
64
	//}
65
	//
66
	//while (csvReader.readRecord())	{
67
	//	String text_id = csvReader.get("id")
68
	//	if (text_id == null || text_id.length() == 0) {
69
	//		//println "** @id not found for record="+csvReader.getRawRecord()
70
	//		continue;
71
	//	}
72
	//
73
	//	String corpus_id = csvReader.get("corpus_id")
74
	//	if (corpus_id == null || corpus_id.length() == 0) {
75
	//		println "** @corpus_id not found for record="+csvReader.getRawRecord()
76
	//		continue;
77
	//	}
78
	//
79
	//	MainCorpus corpus = corpora[corpus_id]
80
	//	if (corpus == null) {
81
	//		println "** no corpus found for ID=$corpus_id"
82
	//		continue;
83
	//	}
84
	//	def project = corpus.getProject()
85
	//
86
	//	def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID())
87
	//
88
	//	if (!txmDir.exists()) {
89
	//		println "** the selected corpus has no XML-TXM files. Aborting."
90
	//	}
91
	//
92
	//
93
	//	File txmFile = new File(txmDir, text_id+".xml")
94
	//	if (txmFile.exists()) {
95
	//
96
	//		def data = [:]
97
	//
98
	//		for (def h : header) data[h] = csvReader.get(h)
99
	//
100
	//		String xmlteiHeaderContent = getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties);
101
	//		if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) {
102
	//			injecting(txmFile, xmlteiHeaderContent)
103
	//		} else {
104
	//			println "** Text header not updated: $txmFile"
105
	//		}
106
	//	} else {
107
	//		println "** Text not found: $txmFile"
108
	//	}
109
	//}
110

  
111
	def getCustomizedTEiHeader(File teiHeaderTemplateFile, def data, Properties xpathProperties) {
112
		XPathResult xpathProcessor = new XPathResult(teiHeaderTemplateFile);
113
		for (String info : data.keySet()) {
114
			String xpath = xpathProperties[info];
115
			String value = data[info]
116
			if (xpath == null) {
117
				continue; // not a data to inject
118
			}
119
			if (debug > 1) println "   injecting '$info'='$value' in '$xpath'"
120

  
121
			def expr = xpathProcessor.xpath.compile(xpath);
122
			def first = expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE);
123
			if (first != null) {
124
				switch (first.getNodeType()) {
125
					case 1: // element
126
						if (debug > 1) println "   patching: $info with "+value
127
						def newChild = xpathProcessor.doc.createElement(first.getTagName())
128
						newChild.appendChild(xpathProcessor.doc.createTextNode(value))
129
						first.getParentNode().replaceChild(newChild, first)
130
						break;
131
					case 2: // attribute
132
						if (debug > 1) println "   patching attribute: $info with "+value
133
						first.setNodeValue(value)
134
						break;
135
					case 3: // text
136
						if (debug > 1) println "   patching text: $info with "+value
137
						first.setNodeValue(value)
138
						break;
139
					default:
140
						break
141
				}
142
			} else {
143
				println "** not found $xpath"
144
			}
145
		}
146

  
147
		//	return xpathProcessor.doc.getElementsByTagName("teiHeader").item(0).getNodeValue();
148
		//println "DOC="+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0).toString()
149
		String content = ""+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0)
150
		content = content.substring('<?xml version="1.0" encoding="UTF-8"?>'.length())
151
	}
152

  
153
	/**
154
	 * Replace the teiHeader 
155
	 * @param txmFile
156
	 * @param data
157
	 * @param paths
158
	 * @param xmlteiHeaderContent
159
	 * @return
160
	 */
161
	def injecting(File txmFile, String xmlteiHeaderContent) {
162
		println "Editing: $txmFile..."
163
		StaxIdentityParser sparser = new StaxIdentityParser(txmFile) {
164
					boolean start = false;
165
					public void processStartElement() {
166

  
167
						if (localname == "teiHeader") {
168
							start = true;
169
							if (debug > 2) println "   replacing teiHeader"
170
							if (debug > 3) println "       with $xmlteiHeaderContent"
171
							output.write(xmlteiHeaderContent.getBytes(Charset.forName("UTF-8")));  // REPLACE CONTENT !
172
						}
173
						if (!start) {
174
							super.processStartElement();
175
						}
176
					}
177

  
178
					public void processEndElement() {
179
						if (!start) {
180
							super.processEndElement();
181
						}
182
						if (localname == "teiHeader") {
183
							start = false;
184
							if (debug > 2) println "   replace done"
185
						}
186
					}
187

  
188
					public void processCharacters() {
189
						if (!start) {
190
							super.processCharacters();
191
						}
192
					}
193

  
194
					public void processComment() {
195
						if (!start) {
196
							super.processComment();
197
						}
198
					}
199
				}
200

  
201
		File tmpDirectory = new File("/home/mdecorde/TEMP");
202
		File outfile = new File(tmpDirectory, "tmp_"+txmFile.getName())
203
		File copyFile = new File(tmpDirectory, "copy_"+txmFile.getName())
204

  
205
		if (sparser.process(outfile)) { // replace inputFile
206
			if (txmFile.renameTo(copyFile)) {
207
				if (outfile.renameTo(txmFile)) {
208
					println " -> SUCCESS see $txmFile"
209
					return true
210
				} else {
211
					println " -> FAIL could not replace $txmFile"
212
					println " -> see result in $outfile"
213

  
214
				}
215
			} else {
216
				println " -> FAIL could not make a copy of $txmFile in $copyFile"
217
				println " -> see result in $outfile"
218
			}
219

  
220
		} else {
221
			println " -> FAIL see $outfile"
222
		}
223

  
224
		return false;
225
	}
226
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/ComputeDemocratMeasureMetadataMacro.groovy (revision 2029)
1
package org.txm.macro.urs.democrat
2

  
3
import org.txm.searchengine.cqp.corpus.MainCorpus
4
import org.txm.searchengine.cqp.corpus.query.CQLQuery
5
import visuAnalec.donnees.Corpus
6

  
7
class ComputeDemocratMeasureMetadataMacro {
8
	def getStats(MainCorpus corpus) {
9
		def data = [:]
10
		
11
		data["number of words"] = corpus.query(new CQLQuery("[]"), "TMP", false).getNMatch();
12
		
13
		Corpus analecCorpus = URSCorpora.getCorpus(corpus);
14
		def chaines = analecCorpus.getSchemas("CHAINE");
15
		def refchaines = [];
16
		for (def chaine : chaines) {
17
			if (chaine.getUnitesSousjacentes().length >= 3) {
18
				refchaines << chaine
19
			}
20
		}
21
		data["number of coreference chains"] = chaines.size()
22
		data["number of referring chains"] = refchaines.size()
23
		data["number of referring expressions"] = analecCorpus.getUnites("MENTION").size()
24
	}
25
	
26
	def updateTeiHeader(MainCorpus corpus) {
27
		
28
	}
29
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/ComputeDemocratMeasureMetadata.groovy (revision 2029)
1
package org.txm.macro.urs.democrat
2

  
3
import org.txm.searchengine.cqp.corpus.MainCorpus
4
import org.txm.searchengine.cqp.corpus.query.CQLQuery
5
import visuAnalec.donnees.Corpus
6
import org.txm.annotation.urs.URSCorpora
7

  
8
class ComputeDemocratMeasureMetadata {
9
	def getStats(MainCorpus corpus) {
10
		def data = [:]
11
		
12
		data["number of words"] = corpus.query(new CQLQuery("[]"), "TMP", false).getNMatch();
13
		
14
		Corpus analecCorpus = URSCorpora.getCorpus(corpus);
15
		def chaines = analecCorpus.getSchemas("CHAINE");
16
		def refchaines = [];
17
		for (def chaine : chaines) {
18
			if (chaine.getUnitesSousjacentes().length >= 3) {
19
				refchaines << chaine
20
			}
21
		}
22
		data["number of coreference chains"] = chaines.size()
23
		data["number of referring chains"] = refchaines.size()
24
		data["number of referring expressions"] = analecCorpus.getUnites("MENTION").size()
25
		
26
		return data
27
	}
28
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/BuildDemocratCorpusVersion1Macro.groovy (revision 2029)
66 66

  
67 67
def xpathProperties = new Properties()
68 68
xpathProperties.load(IOUtils.getReader(xpathFile))
69
println xpathProperties
69
// add default xpaths for measures
70
for (def s : ["number of words", "number of coreference chains", "number of referring chains", "number of referring expressions"]) {
71
	if (!xpathProperties.containsKey(s)) {
72
		xpathProperties.put(s,"/tei:TEI/tei:teiHeader/tei:fileDesc/tei:extent/tei:measure[@unit=\"$s\"]/@quantity")
73
	}
74
}
70 75

  
76
println "xpath properties: "+xpathProperties.keySet()
77

  
71 78
def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
72
println csvReader
73 79

  
74 80
csvReader.readHeaders();
75 81
def header = csvReader.getHeaders()
76
if (!header.contains("id")) {
82
if (!header.contains("text_id")) {
77 83
	println "** 'id' column not found in $metadataFile header=$header" 
78 84
	return;
79 85
}
......
90 96
}
91 97

  
92 98
while (csvReader.readRecord())	{
93
	String text_id = csvReader.get("id")
99
	String text_id = csvReader.get("text_id")
94 100
	if (text_id == null || text_id.length() == 0) {
95 101
		println "** @id not found for record="+csvReader.getRawRecord()
96 102
		continue;
97 103
	}
104
	println "text: "+text_id
98 105
	
99 106
	String corpus_id = csvReader.get("corpus_id")
100 107
	if (corpus_id == null || corpus_id.length() == 0) {
101
		println "** @corpus_id not found for record="+csvReader.getRawRecord()
102
		continue;
108
		println " ** @corpus_id <- @text_id=$text_id"
109
		corpus_id = text_id;
103 110
	}
104 111
	
105 112
	MainCorpus corpus = corpora[corpus_id]
......
107 114
	if (corpus == null) {
108 115
		File binCorpusFile = new File(inputDirectory, corpus_id+".txm")
109 116
		if (!binCorpusFile.exists()) {
110
			println "** no corpus binary file found for ID=$corpus_id : $binCorpusFile"
117
			println " ** no corpus binary file found for ID=$corpus_id : $binCorpusFile"
111 118
			continue;
112 119
		}
113
		println "CALL load corpus from $binCorpusFile"
120
		println " loading corpus from $binCorpusFile..."
114 121
		JobHandler job = LoadBinaryCorpus.loadBinaryCorpusArchive(binCorpusFile)
115 122
		job.join();
116 123
		project = job.getResultObject();
117 124
		corpus = project.getCorpusBuild(corpus_id);
118 125
		corpora[corpus.getID()] = corpus;
119 126
	} else {
120
		println "USING loaded corpus: "+corpus
127
		println " using loaded corpus: "+corpus
121 128
		project = corpus.getProject()
122 129
	}
123 130
	
124 131
	def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID())
125 132
	
126 133
	if (!txmDir.exists()) {
127
		println "** the selected corpus has no XML-TXM files. Aborting."
134
		println " ** the selected corpus has no XML-TXM files. Aborting."
128 135
		continue;
129 136
	}
130 137
	
131 138
	File txmFile = new File(txmDir, text_id+".xml")
132 139
	if (!txmFile.exists()) {
133
		println "** the selected corpus has no XML-TXM file: $txmFile"
140
		println " ** the selected corpus has no XML-TXM file: $txmFile"
134 141
		continue;
135 142
	}
136 143
	
137
	println "CALL Metadata2TeiHeaderMacro"
138
	Metadata2TEiHeaderMacro mthm = new Metadata2TEiHeaderMacro(debug);
144
	
145
	println " compute measures..."
146
	def data = new ComputeDemocratMeasureMetadata().getStats(corpus)
147
	
148
	Metadata2TEiHeader mthm = new Metadata2TEiHeader(debug);
139 149
	for (def h : header) data[h] = csvReader.get(h)
140 150
	
151
	println " creating teiHeader..."
141 152
	String xmlteiHeaderContent = mthm.getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties);
142 153
	if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) {
143
		mthm.injecting(txmFile, xmlteiHeaderContent)
154
		println " replacing teiHeader... "
155
		mthm.replaceHeader(txmFile, xmlteiHeaderContent)
144 156
	} else {
145
		println "** Text header not updated: $txmFile"
157
		println " ** Text header not updated: $txmFile"
146 158
	}
147
	
148
	println "CALL ComputeDemocratMeasureMetadataMacro"
149
	
150
	println "CALL URS Export"
159
		
160
	println " URS Export"
151 161
	Corpus analecCorpus = URSCorpora.getCorpus(corpus);
152 162
	Vue vue = URSCorpora.getVue(corpus);
153 163
	if (!ExportTEICorpus.export(true, outputDirectory, true, null, corpus, analecCorpus, vue)) {
154
		println "FAIL TO EXPORT CORPUS$corpus"
164
		println " FAIL TO EXPORT CORPUS $corpus"
165
	} else {
166
//		println " Renaming URS file to text name..."
167
//		File ursFile = new File(outputDirectory, corpus.getID()+".urs")
168
//		File textURSFile = new File(outputDirectory, text_id+".urs")
169
//		ursFile.renameTo(textURSFile)
170
		println " Done."
155 171
	}
172
	
173
	
156 174
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/democrat/Metadata2TeiHeader.groovy (revision 2029)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.democrat
3

  
4
import java.nio.charset.Charset
5

  
6
import org.kohsuke.args4j.*
7
import groovy.transform.Field
8
import net.sf.saxon.functions.IndexOf
9

  
10
import org.txm.importer.StaxIdentityParser
11
import org.txm.objects.*
12
import org.txm.rcp.swt.widget.parameters.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.utils.CsvReader
15
import org.txm.utils.io.IOUtils
16
import org.txm.scripts.importer.XPathResult
17
import javax.xml.xpath.XPathConstants
18

  
19
class Metadata2TEiHeader {
20
	
21
	int debug = 0;
22
	public Metadata2TEiHeader(int debug) {
23
		this.debug = debug
24
	}
25
	//@Field @Option(name="teiHeaderTemplateFile", usage="the default teiHeader of texts", widget="FileOpen", required=true, def="teiHeaderTemplateFile.xml")
26
	//def teiHeaderTemplateFile
27
	//
28
	//@Field @Option(name="xpathFile", usage="properties file to redirect metadata column to the teiHeader locations", widget="FileOpen", required=true, def="xpathFile.properties")
29
	//def xpathFile
30
	//
31
	//@Field @Option(name="metadataFile", usage="the TSV file containing the metadata values per text", widget="FileOpen", required=true, def="metadataFile.tsv")
32
	//def metadataFile
33
	//
34
	//@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
35
	//debug
36
	//
37
	//if (!ParametersDialog.open(this)) return;
38
	//if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
39
	//
40
	//def xpathProperties = new Properties()
41
	//xpathProperties.load(IOUtils.getReader(xpathFile))
42
	//println xpathProperties
43
	//
44
	//def csvReader = new CsvReader(metadataFile.getAbsolutePath(), "\t".charAt(0), Charset.forName("UTF-8"))
45
	//println csvReader
46
	//
47
	//csvReader.readHeaders();
48
	//def header = csvReader.getHeaders()
49
	//if (!header.contains("id")) {
50
	//	println "** 'id' column not found in $metadataFile header=$header"
51
	//	return;
52
	//}
53
	//if (!header.contains("corpus_id")) {
54
	//	println "** 'corpus_id' column not found in $metadataFile header=$header"
55
	//	return;
56
	//}
57
	//
58
	//def corpora = [:]
59
	//for (Project project : Workspace.getInstance().getProjects()) {
60
	//	for (MainCorpus corpus : project.getChildren(MainCorpus.class)) {
61
	//		corpora[corpus.getID()] = corpus;
62
	//	}
63
	//}
64
	//
65
	//while (csvReader.readRecord())	{
66
	//	String text_id = csvReader.get("id")
67
	//	if (text_id == null || text_id.length() == 0) {
68
	//		//println "** @id not found for record="+csvReader.getRawRecord()
69
	//		continue;
70
	//	}
71
	//
72
	//	String corpus_id = csvReader.get("corpus_id")
73
	//	if (corpus_id == null || corpus_id.length() == 0) {
74
	//		println "** @corpus_id not found for record="+csvReader.getRawRecord()
75
	//		continue;
76
	//	}
77
	//
78
	//	MainCorpus corpus = corpora[corpus_id]
79
	//	if (corpus == null) {
80
	//		println "** no corpus found for ID=$corpus_id"
81
	//		continue;
82
	//	}
83
	//	def project = corpus.getProject()
84
	//
85
	//	def txmDir = new File(project.getProjectDirectory(), "txm/"+corpus.getID())
86
	//
87
	//	if (!txmDir.exists()) {
88
	//		println "** the selected corpus has no XML-TXM files. Aborting."
89
	//	}
90
	//
91
	//
92
	//	File txmFile = new File(txmDir, text_id+".xml")
93
	//	if (txmFile.exists()) {
94
	//
95
	//		def data = [:]
96
	//
97
	//		for (def h : header) data[h] = csvReader.get(h)
98
	//
99
	//		String xmlteiHeaderContent = getCustomizedTEiHeader(teiHeaderTemplateFile, data, xpathProperties);
100
	//		if (xmlteiHeaderContent != null && xmlteiHeaderContent.length() > 0) {
101
	//			injecting(txmFile, xmlteiHeaderContent)
102
	//		} else {
103
	//			println "** Text header not updated: $txmFile"
104
	//		}
105
	//	} else {
106
	//		println "** Text not found: $txmFile"
107
	//	}
108
	//}
109

  
110
	def getCustomizedTEiHeader(File teiHeaderTemplateFile, def data, Properties xpathProperties) {
111
		XPathResult xpathProcessor = new XPathResult(teiHeaderTemplateFile);
112
		for (String info : data.keySet()) {
113
			String xpath = xpathProperties[info];
114
			String value = data[info]
115
			if (xpath == null) {
116
				continue; // not a data to inject
117
			}
118
			if (debug > 1) println "   injecting '$info'='$value' in '$xpath'"
119

  
120
			def expr = xpathProcessor.xpath.compile(xpath);
121
			def first = expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE);
122
			if (first != null) {
123
				switch (first.getNodeType()) {
124
					case 1: // element
125
						if (debug > 1) println "   patching: $info with "+value
126
						def newChild = xpathProcessor.doc.createElement(first.getTagName())
127
						newChild.appendChild(xpathProcessor.doc.createTextNode(value))
128
						
129
						def attributes = first.getAttributes();
130
						for (int i = 0 ; i < attributes.getLength() ; i++) { // copy attributes
131
							def attr = attributes.item(i)
132
							newChild.setAttribute(attr.getNodeName(), attr.getNodeValue())
133
						}
134
						
135
						first.getParentNode().replaceChild(newChild, first)
136
						break;
137
					case 2: // attribute
138
						if (debug > 1) println "   patching attribute: $info with "+value
139
						first.setNodeValue(value)
140
						break;
141
					case 3: // text
142
						if (debug > 1) println "   patching text: $info with "+value
143
						first.setNodeValue(value)
144
						break;
145
					default:
146
						break
147
				}
148
			} else {
149
				//try finding parent and add a new node
150
				String attribute_xpath = null;
151
				if (xpath.lastIndexOf("/@") > 0) {
152
					attribute_xpath = xpath.substring(xpath.lastIndexOf("/@")+2)
153
					xpath = xpath.substring(0, xpath.lastIndexOf("/@"))
154
				}
155
				String parent_xpath = xpath.substring(0, xpath.lastIndexOf("/"))
156
				String element_xpath = xpath.substring(xpath.lastIndexOf("/")+1)
157
				def parent_expr = xpathProcessor.xpath.compile(parent_xpath);
158
				def parent = parent_expr.evaluate(xpathProcessor.doc.getDocumentElement(), XPathConstants.NODE);
159
				if (parent != null) {
160
					String namespace = element_xpath.substring(element_xpath.indexOf(":"))
161
					String nodename = element_xpath.substring(element_xpath.indexOf(":")+1)
162
					def attributes = [:]
163
					if (nodename.contains("[")) {
164
						
165
						String attributesString = nodename.substring(nodename.indexOf("[")+1, nodename.length()-1)
166
						for (String attributeString : attributesString.split(",")) {
167
							def split = attributeString.split("=")
168
							attributes[split[0].substring(1)] = split[1].substring(1, split[1].length()-1) // remove @ and remove ""
169
						}
170
						nodename = nodename.substring(0, nodename.indexOf("["))
171
					}
172
					def newChild = xpathProcessor.doc.createElement(nodename)
173
					for (String attributeName : attributes.keySet()) {
174
						newChild.setAttribute(attributeName, attributes[attributeName])
175
					}
176
					
177
					if (attribute_xpath != null) {
178
						newChild.setAttribute(attribute_xpath, value)
179
					} else {
180
						newChild.appendChild(xpathProcessor.doc.createTextNode(value))
181
					}
182
					
183
					if (debug > 1) println "  creating node $parent_xpath / $nodename[$attributes] : $attribute_xpath = $value"
184
					parent.appendChild(newChild)
185
				} else {
186
					println " ** info=$info not found or created for "+xpathProperties[info]
187
				}
188
			}
189
		}
190

  
191
		//	return xpathProcessor.doc.getElementsByTagName("teiHeader").item(0).getNodeValue();
192
		//println "DOC="+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0).toString()
193
		String content = ""+xpathProcessor.doc.getDocumentElement().getElementsByTagName("teiHeader").item(0)
194
		content = content.substring('<?xml version="1.0" encoding="UTF-8"?>'.length())
195
	}
196

  
197
	/**
198
	 * Replace the teiHeader 
199
	 * @param txmFile
200
	 * @param data
201
	 * @param paths
202
	 * @param xmlteiHeaderContent
203
	 * @return
204
	 */
205
	def replaceHeader(File txmFile, String xmlteiHeaderContent) {
206
		println " editing: $txmFile..."
207
		StaxIdentityParser sparser = new StaxIdentityParser(txmFile) {
208
					boolean start = false;
209
					public void processStartElement() {
210

  
211
						if (localname == "teiHeader") {
212
							start = true;
213
							if (debug > 2) println "   replacing teiHeader"
214
							if (debug > 3) println "       with $xmlteiHeaderContent"
215
							output.write(xmlteiHeaderContent.getBytes(Charset.forName("UTF-8")));  // REPLACE CONTENT !
216
						}
217
						if (!start) {
218
							super.processStartElement();
219
						}
220
					}
221

  
222
					public void processEndElement() {
223
						if (!start) {
224
							super.processEndElement();
225
						}
226
						if (localname == "teiHeader") {
227
							start = false;
228
							if (debug > 2) println "   replace done"
229
						}
230
					}
231

  
232
					public void processCharacters() {
233
						if (!start) {
234
							super.processCharacters();
235
						}
236
					}
237

  
238
					public void processComment() {
239
						if (!start) {
240
							super.processComment();
241
						}
242
					}
243
				}
244

  
245
		File tmpDirectory = new File("/home/mdecorde/TEMP");
246
		File outfile = new File(tmpDirectory, "tmp_"+txmFile.getName())
247
		File copyFile = new File(tmpDirectory, "copy_"+txmFile.getName())
248

  
249
		if (sparser.process(outfile)) { // replace inputFile
250
			if (txmFile.renameTo(copyFile)) {
251
				if (outfile.renameTo(txmFile)) {
252
					println " -> SUCCESS see $txmFile"
253
					return true
254
				} else {
255
					println " -> FAIL could not replace $txmFile"
256
					println " -> see result in $outfile"
257

  
258
				}
259
			} else {
260
				println " -> FAIL could not make a copy of $txmFile in $copyFile"
261
				println " -> see result in $outfile"
262
			}
263

  
264
		} else {
265
			println " -> FAIL see $outfile"
266
		}
267

  
268
		return false;
269
	}
270
}
tmp/org.txm.libs.msoffice/src/org/txm/libs/msoffice/ReadExcel.java (revision 2029)
11 11

  
12 12
public class ReadExcel {
13 13

  
14
	/**
15
	 * 
16
	 * @param inputFile
17
	 * @param sheetName
18
	 * @return list of lines (line = list of cells)
19
	 */
14 20
	public static ArrayList<ArrayList<String>> toTable(File inputFile, String sheetName) {
15 21

  
16 22
		ArrayList<ArrayList<String>> data = new ArrayList<ArrayList<String>>();
tmp/org.txm.libs.office/src/org/txm/libs/office/ReadODS.java (revision 2029)
58 58
		return data;
59 59
	}
60 60
	
61
	/**
62
	 * 
63
	 * @param inputFile
64
	 * @param sheetname
65
	 * @return list of lines. line = list of cells
66
	 * @throws Exception
67
	 */
61 68
	public static ArrayList<ArrayList<String>> toTable(File inputFile, String sheetname) throws Exception {
62 69
		
63 70
		SpreadsheetDocument spreadsheet = SpreadsheetDocument.loadDocument(inputFile);

Formats disponibles : Unified diff