Révision 881

tmp/org.txm.specificities.core/.classpath (revision 881)
1 1
<?xml version="1.0" encoding="UTF-8"?>
2 2
<classpath>
3 3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins">
5
		<accessrules>
6
			<accessrule kind="accessible" pattern="**"/>
7
		</accessrules>
8
	</classpathentry>
5 9
	<classpathentry kind="src" path="src"/>
6 10
	<classpathentry kind="output" path="bin"/>
7 11
</classpath>
tmp/org.txm.specificities.core/META-INF/MANIFEST.MF (revision 881)
1 1
Manifest-Version: 1.0
2
Require-Bundle: org.txm.utils;bundle-version="1.0.0";visibility:=reexp
3
 ort,org.eclipse.osgi;bundle-version="3.10.2";visibility:=reexport,org
4
 .eclipse.core.runtime;bundle-version="3.10.0";visibility:=reexport,or
5
 g.txm.searchengine.cqp.core;bundle-version="1.1.0";visibility:=reexpo
6
 rt,org.txm.statsengine.r.core;visibility:=reexport,org.txm.lexicaltab
7
 le.core;bundle-version="1.0.0";visibility:=reexport,org.txm.statsengi
8
 ne.core;bundle-version="1.0.0";visibility:=reexport,org.txm.core;bund
9
 le-version="0.7.0";visibility:=reexport,org.txm.progression.core;bund
10
 le-version="1.0.0";visibility:=reexport,org.txm.chartsengine.core;bun
11
 dle-version="1.0.0";visibility:=reexport,org.txm.chartsengine.jfreech
12
 art.core;bundle-version="1.0.0";visibility:=reexport,org.txm.chartsen
13
 gine.r.core;bundle-version="1.0.0";visibility:=reexport
2
Require-Bundle: org.txm.lexicaltable.core;bundle-version="1.0.0";visibility:=reexport,
3
 org.txm.progression.core;bundle-version="1.0.0";visibility:=reexport
14 4
Export-Package: org.txm.functions.contrasts,
15 5
 org.txm.specificities.core.chartsengine.jfreechart,
16 6
 org.txm.specificities.core.chartsengine.r,
tmp/org.txm.specificities.feature/feature.xml (revision 881)
17 17
   </license>
18 18

  
19 19
   <requires>
20
      <import plugin="org.txm.utils" version="1.0.0" match="greaterOrEqual"/>
21
      <import plugin="org.eclipse.osgi" version="3.10.2" match="greaterOrEqual"/>
22
      <import plugin="org.eclipse.core.runtime" version="3.10.0" match="greaterOrEqual"/>
23
      <import plugin="org.txm.searchengine.cqp.core" version="1.1.0" match="greaterOrEqual"/>
24
      <import plugin="org.txm.statsengine.r.core"/>
25 20
      <import plugin="org.txm.lexicaltable.core" version="1.0.0" match="greaterOrEqual"/>
26
      <import plugin="org.txm.statsengine.core" version="1.0.0" match="greaterOrEqual"/>
27
      <import plugin="org.txm.core" version="0.7.0" match="greaterOrEqual"/>
28 21
      <import plugin="org.txm.progression.core" version="1.0.0" match="greaterOrEqual"/>
29
      <import plugin="org.txm.chartsengine.core" version="1.0.0" match="greaterOrEqual"/>
30
      <import plugin="org.txm.chartsengine.jfreechart.core" version="1.0.0" match="greaterOrEqual"/>
31
      <import plugin="org.txm.chartsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
32
      <import plugin="org.eclipse.ui" version="3.106.1" match="greaterOrEqual"/>
33
      <import plugin="org.txm.index.core" version="1.0.0" match="greaterOrEqual"/>
34
      <import plugin="org.txm.statsengine.r.core" version="1.0.0" match="greaterOrEqual"/>
35
      <import plugin="org.txm.statsengine.r.rcp"/>
36 22
      <import plugin="org.txm.chartsengine.rcp"/>
37
      <import plugin="org.txm.rcp" version="0.7.8" match="greaterOrEqual"/>
38
      <import plugin="org.eclipse.core.expressions" version="3.4.600" match="greaterOrEqual"/>
39 23
      <import plugin="org.txm.lexicaltable.rcp"/>
40 24
   </requires>
41 25

  
tmp/org.txm.core/.settings/org.eclipse.jdt.groovy.core.prefs (revision 881)
1 1
eclipse.preferences.version=1
2
groovy.compiler.level=23
2
groovy.compiler.level=-1
3 3
groovy.script.filters=scripts/**/*.groovy,y,src/main/resources/**/*.groovy,y,src/test/resources/**/*.groovy,y
tmp/org.txm.core/src/java/org/txm/importer/NiceToXML.groovy (revision 881)
1
package org.txm.importer
2

  
3
import javax.xml.stream.XMLStreamException
4
import org.apache.tools.ant.types.resources.selectors.InstanceOf;
5
import groovy.xml.*
6

  
7
def root = new File("/home/mdecorde/xml/temoignagesnice/corpus Matrice - fichiers xmlisés/")
8
File srcdir = new File(root, "orig");
9
File outdir = new File(root, "tmp");
10
File okdir = new File(root, "ok");
11
File ok2dir = new File(root, "ok2");
12
File temoignagedir = new File(root, "temoignages");
13
ok2dir.deleteDir()
14
ok2dir.mkdir()
15

  
16
//rename title -> head
17
for (def file : outdir.listFiles()) {
18
	if (!file.getName().endsWith(".xml")) continue;
19
	def doc = new XmlParser().parse(file);
20
	
21
	for (def note : doc.body.chapter.title) {
22
		println note
23
		note.name = "head"
24
	}
25
	
26
	new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
27
		new XmlNodePrinter(new PrintWriter(writer)).print(doc)
28
	}
29
}
30

  
31
/*
32
// ADD chapter@title
33
for (def file : outdir.listFiles()) {
34
	if (!file.getName().endsWith(".xml")) continue;
35
	def doc = new XmlParser().parse(file);
36
	
37
	for (def chapter : doc.body.chapter) {
38
		for (def title : chapter.title) {
39
			chapter.@title = title.text()
40
			break;
41
		}
42
	}
43
	
44
	new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
45
		new XmlNodePrinter(new PrintWriter(writer)).print(doc)
46
	}
47
}
48
*/
49
//FIX figure and caption inclusions
50
/*
51
for (def file : outdir.listFiles()) {
52
	if (!file.getName().endsWith(".xml")) continue;
53
	def doc = new XmlParser().parse(file);
54
	for (def note : doc.body."**".figure) {
55
		note.name = "note"
56
		//println "fig : $note"
57
		//if ("Image :" == note.text()) {
58
			def children = note.parent().children()
59
			int i = children.indexOf(note)
60
			//println i + " < "+children.size()
61
			def nextChild = children[i+1]
62
			if (nextChild != null && nextChild.name().toString() == "caption") {
63
				println nextChild
64

  
65
				note.value = "Images : "+note.text()// + " "+nextChild.text()
66
				//println note
67

  
68
				children.remove(i+1)
69
				note.append(nextChild)
70
			}
71
		//}
72
	}
73

  
74
	new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
75
		new XmlNodePrinter(new PrintWriter(writer)).print(doc)
76
	}
77
}
78
*/
79
/*
80
// DOCBOOK -> DOCBOOK TEXT ONLY
81
for (def file : outdir.listFiles()) {
82
	def doc = new XmlParser().parse(file);
83

  
84
	def body = null
85
	def bookinfo = null
86
	def preface = null
87
	for (def e : doc.body) body = e
88

  
89
	for (def e : doc.bookinfo) {
90
		doc.remove(e)
91
	}
92
	for (def e : doc.preface) {
93
		doc.remove(e)
94
	}
95
	for (def e : doc.appendix) {
96
		doc.remove(e)
97
	}
98
	for (def e : doc.chapter) {
99
		doc.remove(e)
100
	}
101

  
102
	if (body == null) {
103
		println "error text: "+file
104
		continue
105
	}
106

  
107
	new File(ok2dir, file.getName()).withWriter("UTF-8") { writer ->
108
		new XmlNodePrinter(new PrintWriter(writer)).print(doc)
109
	}
110
	//		writer.print XmlUtil.serialize(new StreamingMarkupBuilder().bind {
111
	//			mkp.yield body
112
	//		  })
113
}
114
*/
115
//DOCBOOK to TEI
116
/*
117
 for (def file : outdir.listFiles()) {
118
 def doc = new XmlParser().parse(file);
119
 def body = null
120
 //def bookinfo = null
121
 for (def e : doc.body) body = e
122
 //for (def e : doc.bookinfo) bookinfo = e
123
 //println body.getClass()
124
 if (body == null) {
125
 println "error text: "+file
126
 continue
127
 }
128
 //	bookinfo.name = "teiHeader"
129
 body.name = "text"
130
 def teins = new groovy.xml.Namespace("http://www.tei-c.org/ns/1.0",'tei')
131
 //	for (def node : body."**") {
132
 //		if (node instanceof String) continue
133
 //		def name = node.name()
134
 //		if (name instanceof String)
135
 //			node.name = teins.get(name)
136
 //		else 
137
 //			node.name = teins.get(name.getLocalPart())
138
 //	}
139
 for (def figure : body."**".figure) {
140
 figure.name = "note"
141
 figure.value = "Image : " + figure.caption.text()
142
 }
143
 for (def chapter : body."**".chapter) {
144
 chapter.name = "div"
145
 chapter.@type = "chapter"
146
 }
147
 for (def caption : body."**".title) {
148
 caption.name = "head"
149
 }
150
 for (def para : body."**".para) {
151
 para.name = "p"
152
 }
153
 def newdoc = new Node(null, "TEI");
154
 newdoc.@xmlns="http://www.tei-c.org/ns/1.0";
155
 newdoc.append(new Node(null, "teiHeader"))
156
 newdoc.append(body)
157
 new File(okdir, file.getName()).withWriter("UTF-8") { writer ->
158
 new XmlNodePrinter(new PrintWriter(writer)).print(newdoc)
159
 }
160
 //		writer.print XmlUtil.serialize(new StreamingMarkupBuilder().bind {
161
 //			mkp.yield body
162
 //		  })
163
 }
164
 */
165

  
166
// remove TEI
167
/*outdir.deleteDir()
168
 outdir.mkdir()
169
 def errors = []
170
 for (def file : srcdir.listFiles()) {
171
 if (file.isDirectory()) continue;
172
 //new EncodingConverter(file, "Windows-1252", "UTF-8")
173
 File outfile = new File(outdir, file.getName());
174
 outfile.withWriter("UTF-8") { writer ->
175
 file.eachLine("UTF-8") { line ->
176
 if (line.trim() == "<TEI>") {
177
 } else if (line.trim() == "</TEI>") {
178
 writer.println("</book>")
179
 } else if (line.trim() == "<book lang=\"fr\"/>") {
180
 writer.println("<book lang=\"fr\">")
181
 } else {
182
 writer.println(line)
183
 }
184
 }		
185
 }
186
 try {
187
 ValidateXml.testAndThrow(outfile);
188
 } catch (XMLStreamException e) {
189
 println file.getName() + " : "+ e.getMessage()
190
 errors << file
191
 if (e.getMessage().contains('Message: The element type "TEI" must be terminated by the matching end-tag "</TEI>"')) {
192
 println "Delete line : "+e.location.lineNumber
193
 }
194
 println ""
195
 }
196
 }
197
 */
198
println "done"
199
//if (errors.size() > 0)
200
//	println ""+errors.size()+" errors : $errors"
201
//String content = file.getText("Windows-1252")
202
//println content
tmp/org.txm.core/src/java/org/txm/importer/WExtractWithMode.groovy (revision 881)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2011-10-19 17:50:26 +0200 (mer., 19 oct. 2011) $
25
// $LastChangedRevision: 2038 $
26
// $LastChangedBy: alavrentev $ 
27
//
28
package org.txm.importer
29

  
30
import javax.xml.parsers.DocumentBuilder;
31
import javax.xml.parsers.DocumentBuilderFactory;
32
import javax.xml.parsers.ParserConfigurationException;
33
import javax.xml.transform.OutputKeys;
34
import javax.xml.transform.Result;
35
import javax.xml.transform.Source;
36
import javax.xml.transform.Transformer;
37
import javax.xml.transform.TransformerFactory;
38
import javax.xml.transform.dom.DOMSource;
39
import javax.xml.transform.stream.StreamResult;
40

  
41
import org.w3c.dom.Document;
42
import org.w3c.dom.Element;
43
import org.w3c.dom.NodeList;
44
import org.xml.sax.SAXException;
45

  
46
import javax.xml.stream.*;
47
import java.io.File;
48
import java.net.URL;
49

  
50
// TODO: Auto-generated Javadoc
51
/**
52
 * Extract w tags from a tei file
53
 * not finished.
54
 *
55
 * @author mdecorde
56
 */
57
class WExtractWithMode 
58
{
59
	
60
	/**
61
	 * Process.
62
	 *
63
	 * @param infile the infile
64
	 * @param outfile the outfile
65
	 * @param max the max
66
	 * @return the java.lang. object
67
	 */
68
	public process(File infile, File outfile, String modemax)
69
	{
70
		println "Process "+infile.getName()+", keep $modemax words"
71
		int count = this.countW(infile);
72
		
73
		int max = 0
74
		String mode = ""
75
		
76
		try {
77
		mode = modemax.split("/")[0]
78
		max = Integer.parseInt(modemax.split("/")[1])		
79
		}catch(Exception e ){}
80
		
81
		if(count < max)
82
		{
83
			println "can't extract $max words, the file "+infile.getName()+" contains only $count words"
84
			return;
85
		}
86
		//String ms = "#ms_K"
87
		int part = 0; 
88
		if (mode == "3")
89
		{
90
			part = max/3		
91
		}
92
		else if (mode == "2")
93
		{
94
			part = max/2
95
		}
96
		else if (mode == "1a" || mode == "1m" || mode == "1z")
97
		{
98
			part = max
99
		}
100
		else
101
		{
102
			println "mode must be 1a, 1m, 1z, 2 or 3"
103
			return
104
		}
105
		int from1 = 0 
106
		int to1 = 0
107
		if (mode != "1m" && mode != "1z")
108
		{
109
			to1 = part
110
		}
111
		int from2 = 0
112
		int to2 = 0
113
		if (mode == "3" || mode == "1m")
114
		{
115
			from2 = (count/2) - (part/2);
116
			to2 =(count/2) + (part/2);			
117
		}
118
		int from3 = 0
119
		int to3 = 0
120
		if (mode != "1a" && mode != "1m")
121
		{
122
			from3 = count -part;
123
			to3= count-1;			
124
		}
125
		boolean isSic = false;
126
		boolean isW = false;
127
		boolean isText = false;
128
		boolean printW = true;
129
		int wcount=0;
130
		
131
		println " count : "+count
132
		println "  get from "+from1+" to "+to1
133
		println "  get from "+from2+" to "+to2
134
		println "  get from "+from3+" to "+to3
135
		
136
		
137
		String localname;
138
		String prefix;
139
		InputStream inputData = infile.toURI().toURL().openStream();
140
		XMLInputFactory inputfactory = XMLInputFactory.newInstance();
141
		XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData);
142
		XMLOutputFactory factory = XMLOutputFactory.newInstance();
143
		
144
		FileOutputStream output = new FileOutputStream(outfile)
145
		XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8");
146
		
147
		writer.writeStartDocument("utf-8", "1.0");
148
		
149
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) 
150
		{
151
			if(isText)
152
			{
153
				if((wcount >= from1 && wcount <= to1 )||
154
				(wcount >= from2 && wcount <= to2) ||
155
				(wcount >= from3 && wcount <= to3))
156
					printW = true;
157
				else
158
					printW = false;
159
			}
160
			else
161
				printW = true;
162
			
163
			switch (event) 
164
			{
165
				case XMLStreamConstants.START_ELEMENT:
166
					localname = parser.getLocalName();
167
					prefix = parser.getPrefix();
168
				
169
				/*
170
				 if(localname == "supplied")
171
				 if(parser.getAttributeValue(null,"source") != null)
172
				 ms = parser.getAttributeValue(null,"source")
173
				 if(localname == "sic")
174
				 {
175
				 isSic= true;
176
				 }
177
				 */
178
					if(localname == "text")
179
						isText = true;
180
				
181
					if(localname == "w")
182
					{
183
						isW= true;
184
						wcount++;
185
						
186
						if(isText)
187
						{
188
							if((wcount >= from1 && wcount <= to1 )||
189
							(wcount >= from2 && wcount <= to2) ||
190
							(wcount >= from3 && wcount <= to3))
191
								printW = true;
192
							else
193
								printW = false;
194
						}
195
						else
196
							printW = true;
197
					}
198
				
199
				/*if(!isSic)
200
				 if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg")
201
				 {*/
202
					if(localname == "w")
203
					{
204
						if(printW)
205
						{
206
							if(prefix != null && prefix.length() > 0)
207
								writer.writeStartElement(prefix+":"+localname);
208
							else
209
								writer.writeStartElement(localname);
210
							
211
							for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
212
							{
213
								if(parser.getAttributePrefix(i)!= "")
214
									writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i));
215
								else
216
									writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
217
							}
218
							//writer.writeAttribute("srcmf:src", ms);
219
						}
220
					}
221
					else
222
					{
223
						if(prefix != null && prefix.length() > 0)
224
							writer.writeStartElement(prefix+":"+localname);
225
						else
226
							writer.writeStartElement(localname);
227
						
228
						if(localname == "teiHeader")
229
						{
230
							writer.writeAttribute("xmlns:me", "http://www.menota.org/ns/1.0");
231
							writer.writeAttribute("xmlns:bfm", "http://bfm.ens-lsh.fr/ns/1.0");
232
							//writer.writeAttribute("xmlns:srcmf", "https://listes.cru.fr/wiki/srcmf/index");
233
						}
234
						
235
						if(localname == "TEI")
236
						{
237
							writer.writeAttribute("xmlns","http://www.tei-c.org/ns/1.0");
238
						}
239
						
240
						for(int i= 0 ; i < parser.getAttributeCount() ;i++ )
241
						{
242
							if(parser.getAttributePrefix(i)!= "")
243
								writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i));
244
							else
245
								writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
246
						}
247
					}
248
				//}
249
					break;
250
				
251
				case XMLStreamConstants.END_ELEMENT:
252
					localname =parser.getLocalName()
253
				
254
				/*if(localname == "sic")
255
				 isSic= false;
256
				 if(localname == "w")
257
				 isW= false;
258
				 if(localname == "supplied" && ms != "#ms_K")
259
				 ms = "#ms_K";
260
				 if(!isSic)
261
				 if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg")
262
				 {*/
263
					if(localname == "w")
264
					{
265
						if(printW)
266
						{
267
							writer.writeEndElement();
268
							writer.writeCharacters("\n");
269
						}
270
					}
271
					else
272
					{	
273
						writer.writeEndElement();
274
						writer.writeCharacters("\n");
275
					}
276
				//	}
277
				
278
					break;
279
				
280
				case XMLStreamConstants.CHARACTERS:
281
				//if(!isSic)
282
					if(isW)
283
					{
284
						if(printW)
285
						{
286
							writer.writeCharacters(parser.getText().trim());
287
						}
288
					}
289
					else
290
						writer.writeCharacters(parser.getText().trim());
291
					break;
292
			}
293
		}
294
		writer.flush();
295
		writer.close();
296
		output.close()
297
		inputData.close();
298
	}
299
	
300
	/**
301
	 * Count w.
302
	 *
303
	 * @param infile the infile
304
	 * @return the int
305
	 */
306
	public int countW(File infile)
307
	{
308
		InputStream inputData = infile.toURI().toURL().openStream();
309
		XMLInputFactory inputfactory = XMLInputFactory.newInstance();
310
		XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData);
311
		
312
		int count = 0;
313
		for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) 
314
		{
315
			switch (event) 
316
			{
317
				case XMLStreamConstants.START_ELEMENT:
318
					if(parser.getLocalName() == "w")
319
						count++;
320
			}
321
		}
322
		inputData.close()
323
		return count;
324
	}
325
	
326
	/**
327
	 * The main method.
328
	 *
329
	 * @param args the arguments
330
	 */
331
	public static void main(String[] args)
332
	{
333
		String userDir = System.getProperty("user.home");
334
		
335
		File directory = new File(userDir+"/xml/extract/");
336
		File outdir = new File(userDir+"/xml/extract/","results");
337
		outdir.mkdir();
338
		
339
		File maxfilemode = new File(userDir+"/xml/extract/maxfilemode");
340
		/*
341
		 * maxfilemode format:
342
		 * 
343
		 * filename1.xml	3	45000
344
		 * filename2.xml	1a	15000
345
		 * filename3.xml	1m	15000
346
		 * filename4.xml	1z	15000
347
		 * filename5.xml	2	22500
348
		 */
349
		HashMap<File, String> maxperfile = new HashMap<File, String>();
350
		maxfilemode.eachLine{it->
351
			String[] split = it.split("\t");
352
			if(split.length == 3)
353
			{
354
				try
355
				{
356
				String filename = it.split("\t")[0];
357
				String modemax = it.split("\t")[1]+"/"+it.split("\t")[2]
358
				maxperfile.put(filename, modemax);
359
				}catch(Exception e ){}
360
			}
361
		}
362
		println maxperfile;
363
		
364
		def files = directory.listFiles();
365
		for(File infile : files)
366
		{
367
			
368
			if(maxperfile.containsKey(infile.getName()))
369
			{
370
				File outfile = new File(outdir, infile.getName());
371
				String modemax = maxperfile.get(infile.getName());
372
				new WExtractWithMode().process(infile, outfile, modemax)
373
			}
374
		}
375
	}
376
}
tmp/org.txm.core/src/java/org/txm/importer/HTML2XHTML.groovy (revision 881)
1
package org.txm.importer
2

  
3
import org.txm.utils.CharsetDetector;
4
import org.txm.importer.ValidateXml;
5

  
6
File infile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/jod/odt.html")
7
File outfile = new File("/home/mdecorde/Bureau/matrice/témoignages/CONVERSIONS/jod/odt.xml")
8

  
9
String encoding = new CharsetDetector(infile).getEncoding();
10
println "Encoding: $encoding"
11
String text = infile.getText(encoding);
12

  
13
//lower case tags
14
text = text.replaceAll(/(<[^!][^>]*>)/, 
15
	{ full, word -> 
16
		//fix attributes TRUC=sdf234
17
		word = word.replaceAll("([A-Z]+=)([^\" >]+)([ >])",'$1"$2"$3' )
18
		word.toLowerCase() // bourrin
19
	} )
20

  
21
//lower case <.> tags
22
text = text.replaceAll(/(<.>)/,
23
	{ full, word ->
24
		word.toLowerCase()
25
	} )
26

  
27
//resolve entities
28
text = text.replaceAll(/&nbsp;/," ")
29

  
30
//close tags
31
text = text.replaceAll(/<br>/,"<br/>")
32
text = text.replaceAll(/<meta([^>]*)>/,'<meta$1/>')
33
text = text.replaceAll(/<img([^>]*)>/,'<img$1/>')
34

  
35
//remove doctype declaration
36
text = text.replace('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">', '')
37

  
38
//write&Validate
39
outfile.withWriter(encoding) { writer -> writer.write(text) }
40
if (!ValidateXml.test(outfile)) {
41
	println "FILE: $outfile"
42
}
tmp/org.txm.core/src/java/org/txm/importer/XMLText2TXTCSV.groovy (revision 881)
1
package org.txm.importer
2

  
3
File dir = new File("/home/mdecorde/xml/voeux/split_xml")
4
File outdir = new File("/home/mdecorde/xml/voeux/split_txtcsv")
5
println "1) xml -> txt + write metadata.csv"
6
outdir.deleteDir()
7
outdir.mkdir()
8
File metadatafile = new File(outdir, "metadata.csv")
9
String csvString = ""
10

  
11
def files = dir.listFiles()
12
files.sort()
13
for(File f : files)
14
{
15
	File outfile = new File(outdir, f.getName()+".txt");
16
	String text = f.getText("UTF-8");
17
	String texttag = text.find("<text id.*>")
18
//	println texttag
19
//	texttag = texttag.replaceAll('<text id="([^"]+)"', '<text id="$1.txt"')
20
//	println "> "+texttag
21
	text = text.replaceAll("<text.*>", "").replace("</text>", "");
22
	outfile.withWriter("UTF-8"){writer -> writer.write(text) }
23
	csvString += texttag.replace("<text id=","").replace(" loc=", ",").replace(" annee=", ",").replace("\">", "\"")+"\n"
24
}
25

  
26
println "2) write metadata.csv"
27
metadatafile.withWriter("UTF-8"){csvwriter -> 
28
	csvwriter.write("\"id\",\"loc\",\"annee\"\n");
29
	csvwriter.write(csvString)}
30

  
31
println "3) rename Voeux_*"
32
outdir.eachFileMatch(~/Voeux_.*/) {file-> file.renameTo(new File(outdir, file.getName().substring(6, 10)+".txt")) }
tmp/org.txm.core/src/java/org/txm/importer/xmltxm/BuildTTSrc.groovy (revision 881)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.xmltxm
29

  
30
import java.text.DateFormat;
31
import java.util.Date;
32
import java.util.ArrayList;
33
import javax.xml.stream.*;
34
import java.net.URL;
35

  
36
import org.txm.Toolbox;
37
import org.txm.importer.filters.*;
38

  
39
// TODO: Auto-generated Javadoc
40
/**
41
 * The Class BuildTTSrc.
42
 *
43
 * @author mdecorde
44
 * build the TT source for tigerSearch
45
 */
46

  
47
public class BuildTTSrc {
48
	
49
	/** The url. */
50
	private def url;
51
	
52
	/** The input data. */
53
	private def inputData;
54
	
55
	/** The factory. */
56
	private def factory;
57
	
58
	/** The parser. */
59
	private XMLStreamReader parser;
60
	
61
	/** The output. */
62
	private BufferedWriter output;
63
	
64
	/**
65
	 * Instantiates a new builds the tt src.
66
	 * uses XML-TXM V2
67
	 *
68
	 * @param url the url of the file to process
69
	 */
70
	public BuildTTSrc(URL url) {
71
		try {
72
			this.url = url;
73
			inputData = url.openStream();
74
			factory = XMLInputFactory.newInstance();
75
			parser = factory.createXMLStreamReader(inputData);
76
			
77
		} catch (XMLStreamException ex) {
78
			System.out.println(ex);
79
		} catch (IOException ex) {
80
			System.out.println("IOException while parsing ");
81
		}
82
	}
83
	
84
	/**
85
	 * Creates the output.
86
	 *
87
	 * @param outfile the outfile
88
	 * @return true, if successful
89
	 */
90
	private boolean createOutput(File outfile) {
91
		try {
92
			File f = outfile;
93
			output = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile),
94
					"UTF-8"));
95
			return true;
96
		} catch (Exception e) {
97
			System.out.println(e.getLocalizedMessage());
98
			return false;
99
		}
100
	}
101
	
102
	/**
103
	 * Process.
104
	 *
105
	 * @param outfile the outfile
106
     * @param formtype, if multiple form, use this param to choose the correct one, if null takes the first form found
107
	 * @return true, if successful
108
	 */
109
	public boolean process(File outfile, String formtype) {
110
		if (!createOutput(outfile))
111
			return false;
112
		
113
		boolean flagform = false; // to catch the content of the form tag
114
		boolean firstform = false; // to know if its the first form of the w element
115
		String form = ""; // the content of the form tag
116
		String lastopenlocalname = "";
117
		String localname = "";
118
		StringBuffer buffer = new StringBuffer();
119
		try {
120
			for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
121
				switch (event) {
122
					case XMLStreamConstants.START_ELEMENT:
123
						localname = parser.getLocalName();
124
						switch (localname) {
125
							case "w":
126
								//firstform = true;
127
								break;
128
							case "form":
129
//								if (firstform) {
130
//									if (formtype != null) {
131
//										if(parser.getAttributeCount() > 0 
132
//											&& parser.getAttributeValue(0).equals(formtype)) // only one attribute in form, type
133
//											flagform = true;
134
//									}
135
//									else
136
									flagform = true;
137
									form = "";
138
									firstform = false;
139
								//}
140
								break;
141
							case "s": // TreeTagger can use s tags
142
								buffer.append("<s>\n");
143
								break;
144
						}
145
						break;
146
					case XMLStreamConstants.END_ELEMENT:
147
						localname = parser.getLocalName();
148
						switch (localname) {
149
							case "form":
150
								flagform = false;
151
								form = form.trim()
152
								if (form.length() == 0) buffer.append("__EMPTY__\n");
153
								else buffer.append(form.replace("\n", "").replace("<", "&lt;")+ "\n");
154
								//buffer.append(form+ "\n"); // its a txt file no need to use entities
155
								break;
156
							
157
							case "s":
158
								buffer.append("</s>\n");
159
								break;
160
						}
161
						break;
162
					
163
					case XMLStreamConstants.CHARACTERS:
164
						if (flagform) {
165
							if (parser.getText().length() > 0)
166
								form += parser.getText();
167
						}
168
						break;
169
				}
170
			}
171

  
172
			String str = buffer.toString()
173
			if ("false".equals(Toolbox.getPreference(Toolbox.TREETAGGER_APOSTROPHE))) {
174
				str = str.replace("’", "'").replace("‘", "'");
175
			}
176
			output.write(str)
177
			output.close();
178
			parser.close();
179
			inputData.close();
180
		} catch (Exception ex) {
181
			System.out.println(ex);
182
			return false;
183
		} 
184
		
185
		return true;
186
	}
187
	
188
	/**
189
	 * The main method.
190
	 *
191
	 * @param args the arguments
192
	 */
193
	public static void main(String[] args) {
194
		
195
		String rootDir = "~/xml/rgaqcj/";
196
		// new File(rootDir+"/identity/").mkdir();
197
		
198
		ArrayList<String> milestones = new ArrayList<String>();// the tags who
199
		// you want them
200
		// to stay
201
		// milestones
202
		milestones.add("tagUsage");
203
		milestones.add("pb");
204
		milestones.add("lb");
205
		milestones.add("catRef");
206
		
207
		File srcfile = new File(rootDir + "anainline/", "roland-p5.xml");
208
		File resultfile = new File(rootDir + "ttsrc/", "roland-p5.tt");
209
		println("build ttsrc file : " + srcfile + " to : " + resultfile);
210
		
211
		def builder = new BuildTTSrc(srcfile.toURL(), milestones);
212
		builder.process(resultfile);
213
		
214
		return;
215
	}
216
	
217
}
tmp/org.txm.core/src/java/org/txm/importer/xmltxm/Xml2Ana.groovy (revision 881)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
//
22
//
23
//
24
// $LastChangedDate: 2017-04-19 16:23:38 +0200 (mer. 19 avril 2017) $
25
// $LastChangedRevision: 3430 $
26
// $LastChangedBy: mdecorde $
27
//
28
package org.txm.importer.xmltxm
29

  
30
import org.txm.importer.HasElement;
31
import org.txm.importer.StaxIdentityParser;
32

  
33
import java.text.DateFormat;
34
import java.util.ArrayList;
35
import java.util.Date;
36
import java.util.HashMap;
37
import java.util.Locale;
38

  
39
import javax.xml.stream.*;
40

  
41
import java.net.URL;
42

  
43
import org.txm.importer.filters.*;
44
import org.txm.utils.AsciiUtils;
45

  
46
/**
47
 * The Class Xml2Ana.
48
 *
49
 * @author mdecorde
50
 * transform : pre xml-tei file >> xml-tei-txm file
51
 * The pre xml-tei file must contains a minimal teiHeader with classDecl, encodingDesc and titleStmt
52
 * 
53
 * you must specify the correspondance between word attributs and ana types&respStmtIDs
54
 * then the attributes of w tags will be transformed into interp tag
55
 */
56
public class Xml2Ana extends StaxIdentityParser
57
{
58
	/** The dir. */
59
	private def dir;
60

  
61
	/** The convert all attributes. */
62
	private boolean convertAllAttributes = false;
63

  
64
	/** The corresp type. */
65
	HashMap<String,String> correspType;
66

  
67
	/** The corresp ref. */
68
	HashMap<String,String> correspRef;
69

  
70
	/** The check tags. */
71
	HashMap<String,Boolean> checkTags = new HashMap<String,Boolean>();
72

  
73
	/** The resp id. */
74
	def respId = [];
75

  
76
	/** The applications. */
77
	HashMap<String,File> applications;
78

  
79
	/** The taxonomies. */
80
	HashMap<String,String[]> taxonomies;
81

  
82
	/** The resps. */
83
	HashMap<String,String[]> resps;
84

  
85
	/** The items. */
86
	HashMap<String,HashMap<String,String>> items;
87

  
88
	/** The XML headeradded. */
89
	boolean XMLHeaderadded = false;
90
	String textname;
91
	String wtag = "w";
92
	
93
	public static final String TEXT = "text"
94
	public static final String ID = "id"
95

  
96
	/**
97
	 * Instantiates a new xml2 ana.
98
	 *
99
	 * @param url the url
100
	 * @param wordprefix the wordprefix
101
	 */
102
	public Xml2Ana(File file) {
103
		super(file.toURI().toURL());
104
		//File file = new File(url.getFile()).getAbsoluteFile()
105
		textname = file.getName();
106
		int idx = textname.lastIndexOf(".");
107
		if (idx > 0)
108
			textname = textname.substring(0, idx)
109

  
110
		
111
		checkTags.put("respStmt",false);
112
		checkTags.put("titleStmt",false);
113
		checkTags.put("appInfo",false);
114

  
115
		hasText = new HasElement(file, TEXT).process();
116
	}
117

  
118
	/**
119
	 * Sets the convert all atrtibutes.
120
	 *
121
	 * @param value the value
122
	 * @return the java.lang. object
123
	 */
124
	public setConvertAllAtrtibutes(boolean value) {
125
		convertAllAttributes = value;
126
	}
127
	
128
	/**
129
	 * Sets the convert all atrtibutes.
130
	 *
131
	 * @param value the value
132
	 * @return the java.lang. object
133
	 */
134
	public setWordTag(String wtag) {
135
		this.wtag = wtag
136
	}
137

  
138
	int idcount = 0;
139
	boolean flagWord = false;
140
	int firstElement = 0;
141
	boolean teiElementAdded = false;
142
	boolean teiHeaderElementAdded = false;
143
	boolean hasText = false;
144
	boolean textElementAdded = false;
145
	def anabalises = [];
146
	protected void processStartElement()
147
	{
148
//		println "checkTags=$checkTags";
149
//		println "parser=$parser";
150
		firstElement++;
151
		
152
		if (this.checkTags.containsKey(parser.getLocalName())) {
153
			this.checkTags.put(parser.getLocalName(), true);
154
		}
155
		
156
		switch (parser.getLocalName()) {
157
			case wtag:
158
				if (!hasText) {
159
					writer.writeStartElement(TEXT);
160
					writer.writeAttribute(ID, textname);
161
					textElementAdded = true;
162
					hasText = true;
163
				}
164
				idcount++; // increment word counter
165
				anabalises.clear();
166

  
167
				writer.writeStartElement(parser.getLocalName()); // write w
168

  
169
				for (int i = 0 ; i < parser.getNamespaceCount() ; i++) // write namespaces
170
					writer.writeNamespace(parser.getNamespacePrefix(i), parser.getNamespaceURI(i));
171

  
172
				for (int i= 0 ; i < parser.getAttributeCount() ;i++ ) { // transform attributes
173
					String type = parser.getAttributeLocalName(i);
174
					String value = parser.getAttributeValue(i);
175
					if (correspType.containsKey(type)) { // check if txm:ana
176
						String corresptype = correspType.get(type);
177
						String ref = correspRef.get(type);
178
						anabalises.add(["#"+ref, "#"+corresptype, value]);
179
					} else if (type == ID) { // keep id attribute 
180
						String wordid = value
181
						if (wordid.startsWith("w")) {
182
							if (!wordid.startsWith("w_"))
183
								wordid = "w_"+wordid.substring(1)
184
						} 
185
//						else {
186
//							wordid = "w_"+textname+"_"+wordid;
187
//						}
188
						
189
						wordid = AsciiUtils.buildAttributeId(wordid); // remove characters not compatible with the id attribute value
190
						
191
						writer.writeAttribute(type, wordid);
192
						
193
					} else { // add attributes that was in the original <w>
194
						if (convertAllAttributes)
195
							anabalises.add(["none","#"+type, value])
196
						else
197
							writer.writeAttribute(type, value);
198
					}
199
				}
200

  
201
				flagWord = true; // start to capture the form
202
				writer.writeStartElement(TXMNS, "form");
203
				break;
204

  
205
			case "TEI":
206
				super.processStartElement();
207
				boolean hasTeiNS = false;
208
				boolean hasTXMNs = false;
209
				for (int i = 0 ; i < parser.getNamespaceCount() ; i++) {
210
					if (parser.getNamespaceURI(i) == TXMNS)
211
						hasTXMNs = true;
212
					else if (parser.getNamespaceURI(i) == TEINS)
213
						hasTeiNS = true;
214
				}
215
				if (!hasTeiNS) {
216
					writer.writeDefaultNamespace(TEINS);
217
				}
218
				if (!hasTXMNs)
219
					writer.writeNamespace(TXM, TXMNS);
220
				break;
221

  
222
			default:
223

  
224
				if (TEXT.equals(localname)) {
225
					hasText = true;
226
				}
227

  
228
				if (firstElement == 1) { // test if first element is TEI
229
					//println "first tag: "+parser.getLocalName()
230
					if (localname != "TEI") { // "TEI" is missing
231
						teiElementAdded = true;
232
						addTEIElement();
233
					} else if (!hasText) {
234
						writer.writeStartElement(TEXT);
235
						writer.writeAttribute(ID, textname);
236
						textElementAdded = true;
237
						hasText = true;
238
					}
239
				}
240
				if (firstElement == 2 && teiElementAdded != true) {
241
					//println "second tag: "+parser.getLocalName()
242
					if (localname != "teiHeader") { // teiHeader is missing
243
						writeTeiHeader();
244
						hasTeiHeader = true
245
						teiHeaderElementAdded = true
246
					}
247
				} else if (!hasText & (teiElementAdded | teiHeaderElementAdded)) {
248
					writer.writeStartElement(TEXT);
249
					writer.writeAttribute(ID, textname);
250
					textElementAdded = true;
251
					hasText = true;
252
				}
253

  
254
				super.processStartElement();
255
				if (TEXT.equals(localname)) {
256
					if (!parser.getAttributeValue(null, ID)) {
257
						writer.writeAttribute(ID, textname);
258
					}
259
				}
260
		}
261
	}
262

  
263
	protected void after()
264
	{
265
		if (textElementAdded) {
266
			writer.writeEndElement(); // text
267
		}
268
		if (teiElementAdded) {
269
			writer.writeEndElement(); // TEI
270
		}
271
		super.after(); // close writer, parser, etc
272
	}
273

  
274
	protected void addTEIElement()
275
	{
276
		writer.writeStartElement("TEI");
277
		writer.writeDefaultNamespace(TEINS);
278
		writer.writeNamespace(TXM, TXMNS);
279
		writer.writeNamespace(TEI, TEINS);
280
		writeTeiHeader();
281
	}
282

  
283
	protected void processCharacters()
284
	{
285
		if (flagWord) {
286
			writer.writeCharacters(parser.getText().trim()); // keep form in 1 line
287
		} else {
288
			super.processCharacters();
289
		}
290
	}
291
	
292
	boolean hasClassDecl = false;
293
	boolean hasFileDesc = false;
294
	boolean hasEncodingDesc = false;
295
	boolean hasTeiHeader = false;
296
	boolean hasTEI = false;
297
	public static String ANA = "ana"
298
	public static String RESP = "resp"
299
	public static String TYPE = "type"
300
	protected void processEndElement()
301
	{
302
		switch (parser.getLocalName()) {
303
			case wtag:
304
				writer.writeEndElement(); // txm:form
305
				for (def values : anabalises)
306
				{// <txm:ana resp=ref type=corresptype>value</txm:ana>
307
					writer.writeStartElement(TXMNS, ANA);
308
					writer.writeAttribute(RESP, values[0]);
309
					writer.writeAttribute(TYPE, values[1]);
310
					writer.writeCharacters(values[2]);
311
					writer.writeEndElement(); // txm:ana
312
				}
313

  
314
				flagWord = false;
315
				break;
316

  
317
			case "fileDesc":
318
				hasFileDesc = true;
319
				this.writeTXMResps();
320
				break;
321

  
322
			case "classDecl":
323
				hasClassDecl=true;
324
				this.writeTXMTaxonomies();
325
				break;
326
			case "encodingDesc":
327
				hasEncodingDesc = true;
328
				writeContentOfEncodingDesc();
329
				break;
330

  
331
			case "teiHeader":
332
				hasTeiHeader = true
333
				if (!hasEncodingDesc) {
334
					writer.writeStartElement("encodingDesc");
335
					writeContentOfEncodingDesc();
336
					writer.writeEndElement();
337
				}
338

  
339
				break;
340
			case "TEI":
341
				hasTEI = true;
342
				if (!hasTeiHeader) {
343
					writeTeiHeader();
344
				}
345
				break;
346
		}
347

  
348
		super.processEndElement();
349
	}
350

  
351
	protected void writeTeiHeader()
352
	{
353
		writer.writeStartElement("teiHeader");
354
		writer.writeStartElement("fileDesc")
355
		this.writeTXMResps();
356
		writer.writeStartElement("titleStmt")
357
		writer.writeStartElement("title")
358
		writer.writeEndElement(); // title
359
		writer.writeEndElement(); // titleStmt
360
		writer.writeStartElement("publicationStmt")
361
		writer.writeEndElement(); // publicationStmt
362
		writer.writeStartElement("sourceDesc")
363
		writer.writeEndElement(); // sourceDesc
364
		writer.writeEndElement(); // fileDesc
365
		writer.writeStartElement("encodingDesc");
366
		writeContentOfEncodingDesc();
367
		writer.writeEndElement(); // encodingDesc
368
		writer.writeEndElement(); // teiHeader
369
	}
370

  
371
	protected void writeContentOfEncodingDesc()
372
	{
373
		writer.writeStartElement("appInfo")
374
		this.writeTXMApps();
375
		writer.writeEndElement(); // appInfo
376
		if (!hasClassDecl) {
377
			writer.writeStartElement("classDecl");
378
			this.writeTXMTaxonomies();
379
			writer.writeEndElement(); // classDecl
380
		}
381
	}
382

  
383
	/**
384
	 * Check resp.
385
	 *
386
	 * @return the string
387
	 */
388
	public String checkResp()
389
	{
390
		String rez ="found tags : \n";
391
		for (String key : checkTags.keySet())
392
			rez += "\t"+key+"\n";
393
		return rez;
394
	}
395

  
396
	/**
397
	 * Sets the correspondances.
398
	 *
399
	 * @param correspRef the corresp ref
400
	 * @param correspType the corresp type
401
	 */
402
	public void setCorrespondances(correspRef, correspType)
403
	{
404
		this.correspRef = correspRef;
405
		this.correspType = correspType;
406
	}
407

  
408
	/**
409
	 * Sets the header infos.
410
	 *
411
	 * @param respId the resp id
412
	 * @param resps the resps
413
	 * @param applications the applications
414
	 * @param taxonomies the taxonomies
415
	 * @param items the items
416
	 */
417
	public void setHeaderInfos(respId,resps, applications, taxonomies, items)
418
	{
419
		this.respId = respId
420
		this.resps = resps
421
		this.applications = applications
422
		this.taxonomies = taxonomies;
423
		this.items = items;
424
	}
425

  
426
	/**
427
	 * Write txm resps.
428
	 */
429
	public void writeTXMResps()
430
	{
431
		for (String ref : respId) {
432
			String[] infos = resps.get(ref);
433
			writer.writeStartElement("respStmt");
434
			writer.writeStartElement(RESP);
435
			writer.writeAttribute(ID,ref);
436
			writer.writeCharacters(infos[0]);
437
			writer.writeStartElement("date");
438
			writer.writeAttribute("when",infos[2]);
439
			writer.writeCharacters(infos[3]);
440
			writer.writeEndElement(); // date
441
			writer.writeEndElement(); //resp
442
			writer.writeStartElement("name");
443
			writer.writeAttribute(TYPE, "person");
444
			writer.writeCharacters(infos[1])
445
			writer.writeEndElement(); // name
446
			writer.writeEndElement(); //respStmt
447
		}
448
	}
449

  
450
	/**
451
	 * Write txm apps.
452
	 */
453
	public void writeTXMApps()
454
	{
455
		for (String ref : respId) {
456
			List<String> list= applications.get(ref);
457
			String ident = list.get(0);
458
			String version = list.get(1);
459
			File report = list.get(2);
460

  
461
			writer.writeStartElement(TXMNS, "application");
462
			writer.writeAttribute("ident", ident);
463
			writer.writeAttribute("version", version);
464
			writer.writeAttribute(RESP, ref);
465

  
466
			//get txm:commandLine from GeneratedReport
467
			if (report != null) {
468
				writer.writeCharacters("");writer.flush();
469
				Reader reader = new FileReader(report);
470
				String line = reader.readLine();
471
				while (line != null) {
472
					if (line.length() != 0)
473
						output.write(line+"\n");
474
					line = reader.readLine();
475
				}
476
				reader.close();
477
			}
478

  
479
			writer.writeStartElement("ab");
480
			writer.writeAttribute(TYPE, "annotation");
481
			for (String item : taxonomies.get(ref)) {
482
				writer.writeStartElement("list");
483
				writer.writeEmptyElement("ref");
484
				writer.writeAttribute(TYPE, "tagset");
485
				writer.writeAttribute("target", item);
486
				writer.writeEndElement(); // list
487
			}
488
			writer.writeEndElement(); // ab
489
			writer.writeEndElement(); // txm:application
490
		}
491
	}
492

  
493
	/**
494
	 * Write txm taxonomies.
495
	 */
496
	public void writeTXMTaxonomies()
497
	{
498
		for (String tax : items.keySet()) {
499
			writer.writeStartElement("taxonomy");
500
			writer.writeAttribute(ID, tax);
501

  
502
			writer.writeStartElement("bibl");
503
			writer.writeAttribute(TYPE, "tagset");
504
			writer.writeStartElement("title");
505
			writer.writeCharacters(tax);
506
			writer.writeEndElement(); // title
507

  
508
			for (String type : items.get(tax).keySet()) {
509
				writer.writeEmptyElement("ref");
510
				writer.writeAttribute(TYPE, type);
511
				writer.writeAttribute("target", items.get(tax).get(type));
512
			}
513
			writer.writeEndElement(); // bibl
514
			writer.writeEndElement(); // taxonomy
515
		}
516
	}
517

  
518
	/**
519
	 * The main method.
520
	 *
521
	 * @param args the arguments
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff