Révision 967
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/frantext/frantextLoader.groovy (revision 967) | ||
---|---|---|
126 | 126 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
127 | 127 |
List<File> srcfiles = srcDir.listFiles(); |
128 | 128 |
for (File f : srcfiles) { // check XML format, and copy file into binDir |
129 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) |
|
129 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
130 | 130 |
continue; |
131 | 131 |
if (ValidateXml.test(f)) { |
132 | 132 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxm/xmltxmLoader.groovy (revision 967) | ||
---|---|---|
47 | 47 |
import org.txm.importer.xmltxm.*; |
48 | 48 |
import org.txm.metadatas.*; |
49 | 49 |
import org.txm.utils.i18n.*; |
50 |
import org.txm.utils.xml.* |
|
50 | 51 |
import org.w3c.dom.Element; |
51 | 52 |
|
52 | 53 |
String userDir = System.getProperty("user.home"); |
... | ... | |
111 | 112 |
println "Copying XML-TXM files..." |
112 | 113 |
List<File> srcfiles = srcDir.listFiles(); |
113 | 114 |
for (File f : srcfiles) {// check XML format, and copy file into binDir |
114 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) |
|
115 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
115 | 116 |
continue; |
116 | 117 |
if (ValidateXml.test(f)) { |
117 | 118 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xml/xmlLoader.groovy (revision 967) | ||
---|---|---|
147 | 147 |
if (srcfiles != null) |
148 | 148 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir |
149 | 149 |
File f = srcfiles.get(i) |
150 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) { |
|
150 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
151 | 151 |
srcfiles.remove(i); |
152 | 152 |
i--; |
153 | 153 |
continue; |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/bfm/bfmLoader.groovy (revision 967) | ||
---|---|---|
124 | 124 |
println "-- VALIDATION - checking XML source files well-formedness" |
125 | 125 |
List<File> srcfiles = srcDir.listFiles(); |
126 | 126 |
for (File f : srcfiles) { // check XML format, and copy file into binDir |
127 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) |
|
127 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
128 | 128 |
continue; |
129 | 129 |
if (ValidateXml.test(f)) { |
130 | 130 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/factiva/factivaLoader.groovy (revision 967) | ||
---|---|---|
154 | 154 |
List<File> srcfiles = srcDir.listFiles(); |
155 | 155 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir |
156 | 156 |
File f = srcfiles.get(i) |
157 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) { |
|
157 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
158 | 158 |
srcfiles.remove(i); |
159 | 159 |
i--; |
160 | 160 |
continue; |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxmpara/xmltxmparaLoader.groovy (revision 967) | ||
---|---|---|
98 | 98 |
//copy txm files |
99 | 99 |
List<File> srcfiles = srcDir.listFiles(); |
100 | 100 |
for (File f : srcfiles) {// check XML format, and copy file into binDir |
101 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) |
|
101 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
102 | 102 |
continue; |
103 | 103 |
if (ValidateXml.test(f)) { |
104 | 104 |
FileCopy.copy(f, new File(paraDir, f.getName())); |
tmp/org.txm.groovy.core/src/groovy/org/txm/importer/transcriber/importer.groovy (revision 967) | ||
---|---|---|
2 | 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
3 | 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
4 | 4 |
// Sophia Antipolis, University of Paris 3. |
5 |
//
|
|
5 |
// |
|
6 | 6 |
// The TXM platform is free software: you can redistribute it |
7 | 7 |
// and/or modify it under the terms of the GNU General Public |
8 | 8 |
// License as published by the Free Software Foundation, |
9 | 9 |
// either version 2 of the License, or (at your option) any |
10 | 10 |
// later version. |
11 |
//
|
|
11 |
// |
|
12 | 12 |
// The TXM platform is distributed in the hope that it will be |
13 | 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
14 | 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
15 | 15 |
// PURPOSE. See the GNU General Public License for more |
16 | 16 |
// details. |
17 |
//
|
|
17 |
// |
|
18 | 18 |
// You should have received a copy of the GNU General |
19 | 19 |
// Public License along with the TXM platform. If not, see |
20 | 20 |
// http://www.gnu.org/licenses. |
21 |
//
|
|
22 |
//
|
|
23 |
//
|
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 | 24 |
// $LastChangedDate:$ |
25 | 25 |
// $LastChangedRevision:$ |
26 |
// $LastChangedBy:$
|
|
26 |
// $LastChangedBy:$ |
|
27 | 27 |
// |
28 | 28 |
|
29 | 29 |
package org.txm.importer.transcriber |
... | ... | |
81 | 81 |
|
82 | 82 |
/** The metadatas. */ |
83 | 83 |
Metadatas metadatas; |
84 |
|
|
84 |
|
|
85 | 85 |
String lang; // language used by the tokenizer |
86 | 86 |
|
87 | 87 |
/** |
... | ... | |
93 | 93 |
*/ |
94 | 94 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang) { |
95 | 95 |
this.trsfiles = trsfiles; |
96 |
this.txmDir = txmDir;
|
|
96 |
this.txmDir = txmDir; |
|
97 | 97 |
this.binDir = binDir; |
98 | 98 |
this.metadatas = metadatas; |
99 | 99 |
this.lang = lang; |
... | ... | |
113 | 113 |
if (!txmDir.exists()) { |
114 | 114 |
println "can't create txmDir: "+txmDir.getAbsolutePath() |
115 | 115 |
} |
116 |
|
|
116 |
|
|
117 | 117 |
// TRS -> TEI |
118 | 118 |
println "Converting TRS to TEI "+trsfiles.size()+" files" |
119 | 119 |
for (File infile : trsfiles) { |
... | ... | |
131 | 131 |
println "" |
132 | 132 |
|
133 | 133 |
if (metadatas != null) { |
134 |
if (metadatas.getHeadersList().size() == 0) { |
|
135 |
println "Malformed metadata file. Check column and text separator. Columns: "+metadatas.getHeadersList() |
|
136 |
return false; |
|
137 |
} |
|
138 |
println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files" |
|
139 |
} |
|
140 |
trsfiles = txmDir.listFiles(); |
|
141 |
trsfiles.sort() |
|
142 |
for (File infile : trsfiles) { |
|
143 |
File outfile = new File(txmDir, "tmp.xml") |
|
144 |
if (metadatas != null && metadatas.isInitialized()) { |
|
145 |
print "." |
|
146 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) { |
|
147 |
println("Failed to inject metadatas in "+infile) |
|
148 |
outfile.delete() |
|
134 |
if (metadatas.getHeadersList().size() > 0) { |
|
135 |
|
|
136 |
println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files" |
|
137 |
|
|
138 |
trsfiles = txmDir.listFiles(); |
|
139 |
trsfiles.sort() |
|
140 |
for (File infile : trsfiles) { |
|
141 |
File outfile = new File(txmDir, "tmp.xml") |
|
142 |
if (metadatas != null && metadatas.isInitialized()) { |
|
143 |
print "." |
|
144 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) { |
|
145 |
println("Failed to inject metadatas in "+infile) |
|
146 |
outfile.delete() |
|
147 |
} |
|
148 |
if (!infile.delete()) { |
|
149 |
println "ERROR: could not delete $infile" |
|
150 |
return false |
|
151 |
} |
|
152 |
outfile.renameTo(infile) |
|
153 |
} |
|
149 | 154 |
} |
150 |
if (!infile.delete()) { |
|
151 |
println "ERROR: could not delete $infile" |
|
152 |
return false |
|
153 |
} |
|
154 |
outfile.renameTo(infile) |
|
155 | 155 |
} |
156 | 156 |
} |
157 |
|
|
157 |
|
|
158 | 158 |
println "" |
159 |
|
|
159 |
|
|
160 | 160 |
// TOKENIZER ENTITIES |
161 | 161 |
println "Tokenizing entities "+txmDir.listFiles().length+" files" |
162 | 162 |
for (File pfile : txmDir.listFiles()) { |
... | ... | |
165 | 165 |
File outfile = File.createTempFile("tok", ".xml", pfile.getParentFile()); |
166 | 166 |
if (tokenizer.process(outfile)) { |
167 | 167 |
if (!(pfile.delete() && outfile.renameTo(pfile))) println "Warning can't rename file "+outfile+" to "+pfile |
168 |
}
|
|
168 |
} |
|
169 | 169 |
outfile.delete(); |
170 | 170 |
} |
171 | 171 |
println "" |
172 |
|
|
172 |
|
|
173 | 173 |
//TOKENIZE |
174 | 174 |
println "Tokenizing "+txmDir.listFiles().length+" files from $txmDir" |
175 | 175 |
File tokenizedDir = new File(binDir, "tokenized") |
... | ... | |
190 | 190 |
} |
191 | 191 |
} |
192 | 192 |
println "" |
193 |
|
|
193 |
|
|
194 | 194 |
//TRANSFORM INTO XML-TEI-TXM |
195 | 195 |
println("Building XML-TXM ("+txmDir.listFiles().length+" files)") |
196 | 196 |
for (File tfile : tokenizedDir.listFiles()) { |
197 | 197 |
print "." |
198 | 198 |
String filename = tfile.getName().substring(0, tfile.getName().length()-4) |
199 | 199 |
File xmlfile = new File(txmDir, tfile.getName()) |
200 |
|
|
200 |
|
|
201 | 201 |
def correspType = new HashMap<String,String>() |
202 | 202 |
correspType.put("event","event"); |
203 | 203 |
correspType.put("audio","audio"); |
... | ... | |
227 | 227 |
def resps = new HashMap<String,String[]>(); |
228 | 228 |
resps.put("trs", ["Transcriber annotations","TXM","",""]) |
229 | 229 |
String wordprefix = "w_"; |
230 |
|
|
230 |
|
|
231 | 231 |
Xml2Ana builder = new Xml2Ana(tfile); |
232 | 232 |
builder.setConvertAllAtrtibutes true; |
233 | 233 |
builder.setCorrespondances(correspRef, correspType); |
... | ... | |
237 | 237 |
xmlfile.delete(); |
238 | 238 |
} |
239 | 239 |
} |
240 |
|
|
240 |
|
|
241 | 241 |
println "" |
242 | 242 |
return txmDir.listFiles() != null; |
243 | 243 |
} |
... | ... | |
251 | 251 |
* @return true, if successful |
252 | 252 |
*/ |
253 | 253 |
public boolean process(File infile, File outfile, ArrayList<Pair<String, String>> metas) { |
254 |
//inject metadatas into
|
|
254 |
//inject metadatas into |
|
255 | 255 |
this.infile = infile; |
256 | 256 |
this.outfile = outfile; |
257 | 257 |
def factory = DocumentBuilderFactory.newInstance() |
... | ... | |
272 | 272 |
println ("insert $pairs into $xpath") |
273 | 273 |
def expr = XPathFactory.newInstance().newXPath().compile(xpath) |
274 | 274 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET) |
275 |
|
|
275 |
|
|
276 | 276 |
for (Node node : nodes) { |
277 | 277 |
Element elem = (Element)node; |
278 | 278 |
for (Pair<String, String> p : pairs) { |
... | ... | |
290 | 290 |
try { |
291 | 291 |
// Création de la source DOM |
292 | 292 |
Source source = new DOMSource(doc); |
293 |
|
|
293 |
|
|
294 | 294 |
// Création du fichier de sortie |
295 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
|
|
295 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
|
296 | 296 |
Result resultat = new StreamResult(writer); |
297 |
|
|
297 |
|
|
298 | 298 |
// Configuration du transformer |
299 | 299 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
300 | 300 |
Transformer transformer = fabrique.newTransformer(); |
301 | 301 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
302 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
|
303 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
|
|
304 |
|
|
302 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
303 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
|
304 |
|
|
305 | 305 |
// Transformation |
306 | 306 |
transformer.transform(source, resultat); |
307 | 307 |
writer.close(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/macroproto/importer/XTZImporterMacro.groovy (revision 967) | ||
---|---|---|
210 | 210 |
if (srcfiles != null) |
211 | 211 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir |
212 | 212 |
File f = srcfiles.get(i) |
213 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) { |
|
213 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
214 | 214 |
srcfiles.remove(i); |
215 | 215 |
i--; |
216 | 216 |
continue; |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/srcmfLoader.groovy (revision 967) | ||
---|---|---|
103 | 103 |
// copy txm files |
104 | 104 |
List<File> srcfiles = txmSrcDir.listFiles(); |
105 | 105 |
for (File f : srcfiles) {// check XML format, and copy file into binDir |
106 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) |
|
106 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
107 | 107 |
continue; |
108 | 108 |
if (ValidateXml.test(f)) { |
109 | 109 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 967) | ||
---|---|---|
138 | 138 |
f = new File(directory, "metadata.tsv"); |
139 | 139 |
if (f.exists()) return f; |
140 | 140 |
|
141 |
f = new File(directory, "metadata.csv"); |
|
142 |
if (f.exists()) return f; |
|
143 |
|
|
144 |
return null; |
|
141 |
return new File(directory, "metadata.csv"); |
|
145 | 142 |
} |
146 | 143 |
|
147 | 144 |
/** |
... | ... | |
397 | 394 |
|
398 | 395 |
if (headers.length == 0) |
399 | 396 |
{ |
400 |
System.out.println("Error: No header in the metadata file "+csvfile); |
|
397 |
System.out.println("Error: No header in the metadata file "+csvfile+" with separators: column='"+separator+"' and text='"+txtseparator+"'"); |
|
398 |
writer.close(); |
|
399 |
output.close(); |
|
401 | 400 |
return false; |
402 | 401 |
} |
403 | 402 |
|
404 | 403 |
if(!headers[0].equals("id")) |
405 | 404 |
{ |
406 |
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"'"); |
|
407 |
return false; |
|
405 |
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'"); |
|
406 |
writer.close(); |
|
407 |
output.close(); |
|
408 |
if (!separator.equals("\t")) { |
|
409 |
System.out.println("\tTrying with separators: column='\t' and text=''..."); |
|
410 |
return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline); |
|
411 |
} |
|
408 | 412 |
} |
409 | 413 |
|
410 | 414 |
//check for double columns |
Formats disponibles : Unified diff