Revision 967

tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 967)
138 138
		f = new File(directory, "metadata.tsv");
139 139
		if (f.exists()) return f;
140 140
		
141
		f = new File(directory, "metadata.csv");
142
		if (f.exists()) return f;
143
		
144
		return null;
141
		return  new File(directory, "metadata.csv");
145 142
	}
146 143
	
147 144
	/**
......
397 394

  
398 395
		if (headers.length == 0)
399 396
		{
400
			System.out.println("Error: No header in the metadata file "+csvfile);
397
			System.out.println("Error: No header in the metadata file "+csvfile+" with separators: column='"+separator+"' and text='"+txtseparator+"'");
398
			writer.close();
399
			output.close();
401 400
			return false;
402 401
		}
403 402

  
404 403
		if(!headers[0].equals("id"))
405 404
		{
406
			System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"'");
407
			return false;
405
			System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
406
			writer.close();
407
			output.close();
408
			if (!separator.equals("\t")) {
409
				System.out.println("\tTrying with separators: column='\t' and text=''...");
410
				return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline);
411
			}
408 412
		}
409 413

  
410 414
		//check for double columns
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxm/xmltxmLoader.groovy (revision 967)
47 47
import org.txm.importer.xmltxm.*;
48 48
import org.txm.metadatas.*;
49 49
import org.txm.utils.i18n.*;
50
import org.txm.utils.xml.*
50 51
import org.w3c.dom.Element;
51 52

  
52 53
String userDir = System.getProperty("user.home");
......
111 112
println "Copying XML-TXM files..."
112 113
List<File> srcfiles = srcDir.listFiles();
113 114
for (File f : srcfiles) {// check XML format, and copy file into binDir
114
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
115
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
115 116
		continue;
116 117
	if (ValidateXml.test(f)) {
117 118
		FileCopy.copy(f, new File(txmDir, f.getName()));
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/frantext/frantextLoader.groovy (revision 967)
126 126
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
127 127
List<File> srcfiles = srcDir.listFiles();
128 128
for (File f : srcfiles) { // check XML format, and copy file into binDir
129
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
129
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
130 130
		continue;
131 131
	if (ValidateXml.test(f)) {
132 132
		FileCopy.copy(f, new File(txmDir, f.getName()));
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxmpara/xmltxmparaLoader.groovy (revision 967)
98 98
//copy txm files
99 99
List<File> srcfiles = srcDir.listFiles();
100 100
for (File f : srcfiles) {// check XML format, and copy file into binDir
101
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
101
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
102 102
	continue;
103 103
	if (ValidateXml.test(f)) {
104 104
		FileCopy.copy(f, new File(paraDir, f.getName()));
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xml/xmlLoader.groovy (revision 967)
147 147
if (srcfiles != null)
148 148
	for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
149 149
		File f = srcfiles.get(i)
150
		if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
150
		if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
151 151
			srcfiles.remove(i);
152 152
			i--;
153 153
			continue;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/bfm/bfmLoader.groovy (revision 967)
124 124
println "-- VALIDATION - checking XML source files well-formedness"
125 125
List<File> srcfiles = srcDir.listFiles();
126 126
for (File f : srcfiles) { // check XML format, and copy file into binDir
127
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
127
	if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
128 128
		continue;
129 129
	if (ValidateXml.test(f)) {
130 130
		FileCopy.copy(f, new File(txmDir, f.getName()));
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/factiva/factivaLoader.groovy (revision 967)
154 154
List<File> srcfiles = srcDir.listFiles();
155 155
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
156 156
	File f = srcfiles.get(i)
157
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
157
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
158 158
		srcfiles.remove(i);
159 159
		i--;
160 160
		continue;
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/transcriber/importer.groovy (revision 967)
2 2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 3
// Lyon 2, University of Franche-Comté, University of Nice
4 4
// Sophia Antipolis, University of Paris 3.
5
// 
5
//
6 6
// The TXM platform is free software: you can redistribute it
7 7
// and/or modify it under the terms of the GNU General Public
8 8
// License as published by the Free Software Foundation,
9 9
// either version 2 of the License, or (at your option) any
10 10
// later version.
11
// 
11
//
12 12
// The TXM platform is distributed in the hope that it will be
13 13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 15
// PURPOSE. See the GNU General Public License for more
16 16
// details.
17
// 
17
//
18 18
// You should have received a copy of the GNU General
19 19
// Public License along with the TXM platform. If not, see
20 20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
21
//
22
//
23
//
24 24
// $LastChangedDate:$
25 25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
26
// $LastChangedBy:$
27 27
//
28 28

  
29 29
package org.txm.importer.transcriber
......
81 81

  
82 82
	/** The metadatas. */
83 83
	Metadatas metadatas;
84
	
84

  
85 85
	String lang; // language used by the tokenizer
86 86

  
87 87
	/**
......
93 93
	 */
94 94
	public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang) {
95 95
		this.trsfiles = trsfiles;
96
		this.txmDir = txmDir; 
96
		this.txmDir = txmDir;
97 97
		this.binDir = binDir;
98 98
		this.metadatas = metadatas;
99 99
		this.lang = lang;
......
113 113
		if (!txmDir.exists()) {
114 114
			println "can't create txmDir: "+txmDir.getAbsolutePath()
115 115
		}
116
		
116

  
117 117
		// TRS -> TEI
118 118
		println "Converting TRS to TEI "+trsfiles.size()+" files"
119 119
		for (File infile : trsfiles) {
......
131 131
		println ""
132 132

  
133 133
		if (metadatas != null) {
134
			if (metadatas.getHeadersList().size() == 0) {
135
				println "Malformed metadata file. Check column and text separator. Columns: "+metadatas.getHeadersList()
136
				return false;
137
			}
138
			println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files"
139
		}
140
		trsfiles = txmDir.listFiles();
141
		trsfiles.sort()
142
		for (File infile : trsfiles) {
143
			File outfile = new File(txmDir, "tmp.xml")
144
			if (metadatas != null && metadatas.isInitialized()) {
145
				print "."
146
				if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) {
147
					println("Failed to inject metadatas in "+infile)
148
					outfile.delete()
134
			if (metadatas.getHeadersList().size() > 0) {
135

  
136
				println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files"
137

  
138
				trsfiles = txmDir.listFiles();
139
				trsfiles.sort()
140
				for (File infile : trsfiles) {
141
					File outfile = new File(txmDir, "tmp.xml")
142
					if (metadatas != null && metadatas.isInitialized()) {
143
						print "."
144
						if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) {
145
							println("Failed to inject metadatas in "+infile)
146
							outfile.delete()
147
						}
148
						if (!infile.delete()) {
149
							println "ERROR: could not delete $infile"
150
							return false
151
						}
152
						outfile.renameTo(infile)
153
					}
149 154
				}
150
				if (!infile.delete()) {
151
					println "ERROR: could not delete $infile"
152
					return false
153
				}
154
				outfile.renameTo(infile)
155 155
			}
156 156
		}
157
		
157

  
158 158
		println ""
159
		
159

  
160 160
		// TOKENIZER ENTITIES
161 161
		println "Tokenizing entities "+txmDir.listFiles().length+" files"
162 162
		for (File pfile : txmDir.listFiles()) {
......
165 165
			File outfile = File.createTempFile("tok", ".xml", pfile.getParentFile());
166 166
			if (tokenizer.process(outfile)) {
167 167
				if (!(pfile.delete() && outfile.renameTo(pfile))) println "Warning can't rename file "+outfile+" to "+pfile
168
			} 
168
			}
169 169
			outfile.delete();
170 170
		}
171 171
		println ""
172
		
172

  
173 173
		//TOKENIZE
174 174
		println "Tokenizing "+txmDir.listFiles().length+" files from $txmDir"
175 175
		File tokenizedDir = new File(binDir, "tokenized")
......
190 190
			}
191 191
		}
192 192
		println ""
193
		
193

  
194 194
		//TRANSFORM INTO XML-TEI-TXM
195 195
		println("Building XML-TXM ("+txmDir.listFiles().length+" files)")
196 196
		for (File tfile : tokenizedDir.listFiles()) {
197 197
			print "."
198 198
			String filename = tfile.getName().substring(0, tfile.getName().length()-4)
199 199
			File xmlfile = new File(txmDir, tfile.getName())
200
			
200

  
201 201
			def correspType = new HashMap<String,String>()
202 202
			correspType.put("event","event");
203 203
			correspType.put("audio","audio");
......
227 227
			def resps = new HashMap<String,String[]>();
228 228
			resps.put("trs", ["Transcriber annotations","TXM","",""])
229 229
			String wordprefix = "w_";
230
			
230

  
231 231
			Xml2Ana builder = new Xml2Ana(tfile);
232 232
			builder.setConvertAllAtrtibutes true;
233 233
			builder.setCorrespondances(correspRef, correspType);
......
237 237
				xmlfile.delete();
238 238
			}
239 239
		}
240
		
240

  
241 241
		println ""
242 242
		return txmDir.listFiles() != null;
243 243
	}
......
251 251
	 * @return true, if successful
252 252
	 */
253 253
	public boolean process(File infile, File outfile, ArrayList<Pair<String, String>> metas) {
254
		//inject metadatas into 	
254
		//inject metadatas into
255 255
		this.infile = infile;
256 256
		this.outfile = outfile;
257 257
		def factory = DocumentBuilderFactory.newInstance()
......
272 272
		println ("insert $pairs into $xpath")
273 273
		def expr = XPathFactory.newInstance().newXPath().compile(xpath)
274 274
		def nodes = expr.evaluate(doc, XPathConstants.NODESET)
275
		
275

  
276 276
		for (Node node : nodes) {
277 277
			Element elem = (Element)node;
278 278
			for (Pair<String, String> p : pairs) {
......
290 290
		try {
291 291
			// Création de la source DOM
292 292
			Source source = new DOMSource(doc);
293
			
293

  
294 294
			// Création du fichier de sortie
295
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); 
295
			Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
296 296
			Result resultat = new StreamResult(writer);
297
			
297

  
298 298
			// Configuration du transformer
299 299
			TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl();
300 300
			Transformer transformer = fabrique.newTransformer();
301 301
			transformer.setOutputProperty(OutputKeys.METHOD, "xml");
302
			transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
303
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); 
304
			
302
			transformer.setOutputProperty(OutputKeys.INDENT, "yes");
303
			transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
304

  
305 305
			// Transformation
306 306
			transformer.transform(source, resultat);
307 307
			writer.close();
tmp/org.txm.groovy.core/src/groovy/org/txm/macroproto/importer/XTZImporterMacro.groovy (revision 967)
210 210
if (srcfiles != null)
211 211
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
212 212
	File f = srcfiles.get(i)
213
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
213
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
214 214
		srcfiles.remove(i);
215 215
		i--;
216 216
		continue;
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/srcmfLoader.groovy (revision 967)
103 103
// copy txm files
104 104
List<File> srcfiles = txmSrcDir.listFiles();
105 105
for (File f : srcfiles) {// check XML format, and copy file into binDir
106
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
106
	if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
107 107
		continue;
108 108
	if (ValidateXml.test(f)) {
109 109
		FileCopy.copy(f, new File(txmDir, f.getName()));

Also available in: Unified diff