Révision 967
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/frantext/frantextLoader.groovy (revision 967) | ||
|---|---|---|
| 126 | 126 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 127 | 127 |
List<File> srcfiles = srcDir.listFiles(); |
| 128 | 128 |
for (File f : srcfiles) { // check XML format, and copy file into binDir
|
| 129 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
|
|
| 129 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
| 130 | 130 |
continue; |
| 131 | 131 |
if (ValidateXml.test(f)) {
|
| 132 | 132 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxm/xmltxmLoader.groovy (revision 967) | ||
|---|---|---|
| 47 | 47 |
import org.txm.importer.xmltxm.*; |
| 48 | 48 |
import org.txm.metadatas.*; |
| 49 | 49 |
import org.txm.utils.i18n.*; |
| 50 |
import org.txm.utils.xml.* |
|
| 50 | 51 |
import org.w3c.dom.Element; |
| 51 | 52 |
|
| 52 | 53 |
String userDir = System.getProperty("user.home");
|
| ... | ... | |
| 111 | 112 |
println "Copying XML-TXM files..." |
| 112 | 113 |
List<File> srcfiles = srcDir.listFiles(); |
| 113 | 114 |
for (File f : srcfiles) {// check XML format, and copy file into binDir
|
| 114 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
|
|
| 115 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
| 115 | 116 |
continue; |
| 116 | 117 |
if (ValidateXml.test(f)) {
|
| 117 | 118 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xml/xmlLoader.groovy (revision 967) | ||
|---|---|---|
| 147 | 147 |
if (srcfiles != null) |
| 148 | 148 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
|
| 149 | 149 |
File f = srcfiles.get(i) |
| 150 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
|
|
| 150 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
| 151 | 151 |
srcfiles.remove(i); |
| 152 | 152 |
i--; |
| 153 | 153 |
continue; |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/bfm/bfmLoader.groovy (revision 967) | ||
|---|---|---|
| 124 | 124 |
println "-- VALIDATION - checking XML source files well-formedness" |
| 125 | 125 |
List<File> srcfiles = srcDir.listFiles(); |
| 126 | 126 |
for (File f : srcfiles) { // check XML format, and copy file into binDir
|
| 127 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
|
|
| 127 |
if (f.isHidden() || f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
| 128 | 128 |
continue; |
| 129 | 129 |
if (ValidateXml.test(f)) {
|
| 130 | 130 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/factiva/factivaLoader.groovy (revision 967) | ||
|---|---|---|
| 154 | 154 |
List<File> srcfiles = srcDir.listFiles(); |
| 155 | 155 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
|
| 156 | 156 |
File f = srcfiles.get(i) |
| 157 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
|
|
| 157 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
| 158 | 158 |
srcfiles.remove(i); |
| 159 | 159 |
i--; |
| 160 | 160 |
continue; |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/xmltxmpara/xmltxmparaLoader.groovy (revision 967) | ||
|---|---|---|
| 98 | 98 |
//copy txm files |
| 99 | 99 |
List<File> srcfiles = srcDir.listFiles(); |
| 100 | 100 |
for (File f : srcfiles) {// check XML format, and copy file into binDir
|
| 101 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
|
|
| 101 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
| 102 | 102 |
continue; |
| 103 | 103 |
if (ValidateXml.test(f)) {
|
| 104 | 104 |
FileCopy.copy(f, new File(paraDir, f.getName())); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/importer/transcriber/importer.groovy (revision 967) | ||
|---|---|---|
| 2 | 2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
| 3 | 3 |
// Lyon 2, University of Franche-Comté, University of Nice |
| 4 | 4 |
// Sophia Antipolis, University of Paris 3. |
| 5 |
//
|
|
| 5 |
// |
|
| 6 | 6 |
// The TXM platform is free software: you can redistribute it |
| 7 | 7 |
// and/or modify it under the terms of the GNU General Public |
| 8 | 8 |
// License as published by the Free Software Foundation, |
| 9 | 9 |
// either version 2 of the License, or (at your option) any |
| 10 | 10 |
// later version. |
| 11 |
//
|
|
| 11 |
// |
|
| 12 | 12 |
// The TXM platform is distributed in the hope that it will be |
| 13 | 13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
| 14 | 14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
| 15 | 15 |
// PURPOSE. See the GNU General Public License for more |
| 16 | 16 |
// details. |
| 17 |
//
|
|
| 17 |
// |
|
| 18 | 18 |
// You should have received a copy of the GNU General |
| 19 | 19 |
// Public License along with the TXM platform. If not, see |
| 20 | 20 |
// http://www.gnu.org/licenses. |
| 21 |
//
|
|
| 22 |
//
|
|
| 23 |
//
|
|
| 21 |
// |
|
| 22 |
// |
|
| 23 |
// |
|
| 24 | 24 |
// $LastChangedDate:$ |
| 25 | 25 |
// $LastChangedRevision:$ |
| 26 |
// $LastChangedBy:$
|
|
| 26 |
// $LastChangedBy:$ |
|
| 27 | 27 |
// |
| 28 | 28 |
|
| 29 | 29 |
package org.txm.importer.transcriber |
| ... | ... | |
| 81 | 81 |
|
| 82 | 82 |
/** The metadatas. */ |
| 83 | 83 |
Metadatas metadatas; |
| 84 |
|
|
| 84 |
|
|
| 85 | 85 |
String lang; // language used by the tokenizer |
| 86 | 86 |
|
| 87 | 87 |
/** |
| ... | ... | |
| 93 | 93 |
*/ |
| 94 | 94 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang) {
|
| 95 | 95 |
this.trsfiles = trsfiles; |
| 96 |
this.txmDir = txmDir;
|
|
| 96 |
this.txmDir = txmDir; |
|
| 97 | 97 |
this.binDir = binDir; |
| 98 | 98 |
this.metadatas = metadatas; |
| 99 | 99 |
this.lang = lang; |
| ... | ... | |
| 113 | 113 |
if (!txmDir.exists()) {
|
| 114 | 114 |
println "can't create txmDir: "+txmDir.getAbsolutePath() |
| 115 | 115 |
} |
| 116 |
|
|
| 116 |
|
|
| 117 | 117 |
// TRS -> TEI |
| 118 | 118 |
println "Converting TRS to TEI "+trsfiles.size()+" files" |
| 119 | 119 |
for (File infile : trsfiles) {
|
| ... | ... | |
| 131 | 131 |
println "" |
| 132 | 132 |
|
| 133 | 133 |
if (metadatas != null) {
|
| 134 |
if (metadatas.getHeadersList().size() == 0) {
|
|
| 135 |
println "Malformed metadata file. Check column and text separator. Columns: "+metadatas.getHeadersList() |
|
| 136 |
return false; |
|
| 137 |
} |
|
| 138 |
println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files" |
|
| 139 |
} |
|
| 140 |
trsfiles = txmDir.listFiles(); |
|
| 141 |
trsfiles.sort() |
|
| 142 |
for (File infile : trsfiles) {
|
|
| 143 |
File outfile = new File(txmDir, "tmp.xml") |
|
| 144 |
if (metadatas != null && metadatas.isInitialized()) {
|
|
| 145 |
print "." |
|
| 146 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) {
|
|
| 147 |
println("Failed to inject metadatas in "+infile)
|
|
| 148 |
outfile.delete() |
|
| 134 |
if (metadatas.getHeadersList().size() > 0) {
|
|
| 135 |
|
|
| 136 |
println "Injecting metadatas "+metadatas.getHeadersList()+" in "+trsfiles.size()+" files" |
|
| 137 |
|
|
| 138 |
trsfiles = txmDir.listFiles(); |
|
| 139 |
trsfiles.sort() |
|
| 140 |
for (File infile : trsfiles) {
|
|
| 141 |
File outfile = new File(txmDir, "tmp.xml") |
|
| 142 |
if (metadatas != null && metadatas.isInitialized()) {
|
|
| 143 |
print "." |
|
| 144 |
if (!metadatas.injectMetadatasInXml(infile, outfile, "text")) {
|
|
| 145 |
println("Failed to inject metadatas in "+infile)
|
|
| 146 |
outfile.delete() |
|
| 147 |
} |
|
| 148 |
if (!infile.delete()) {
|
|
| 149 |
println "ERROR: could not delete $infile" |
|
| 150 |
return false |
|
| 151 |
} |
|
| 152 |
outfile.renameTo(infile) |
|
| 153 |
} |
|
| 149 | 154 |
} |
| 150 |
if (!infile.delete()) {
|
|
| 151 |
println "ERROR: could not delete $infile" |
|
| 152 |
return false |
|
| 153 |
} |
|
| 154 |
outfile.renameTo(infile) |
|
| 155 | 155 |
} |
| 156 | 156 |
} |
| 157 |
|
|
| 157 |
|
|
| 158 | 158 |
println "" |
| 159 |
|
|
| 159 |
|
|
| 160 | 160 |
// TOKENIZER ENTITIES |
| 161 | 161 |
println "Tokenizing entities "+txmDir.listFiles().length+" files" |
| 162 | 162 |
for (File pfile : txmDir.listFiles()) {
|
| ... | ... | |
| 165 | 165 |
File outfile = File.createTempFile("tok", ".xml", pfile.getParentFile());
|
| 166 | 166 |
if (tokenizer.process(outfile)) {
|
| 167 | 167 |
if (!(pfile.delete() && outfile.renameTo(pfile))) println "Warning can't rename file "+outfile+" to "+pfile |
| 168 |
}
|
|
| 168 |
} |
|
| 169 | 169 |
outfile.delete(); |
| 170 | 170 |
} |
| 171 | 171 |
println "" |
| 172 |
|
|
| 172 |
|
|
| 173 | 173 |
//TOKENIZE |
| 174 | 174 |
println "Tokenizing "+txmDir.listFiles().length+" files from $txmDir" |
| 175 | 175 |
File tokenizedDir = new File(binDir, "tokenized") |
| ... | ... | |
| 190 | 190 |
} |
| 191 | 191 |
} |
| 192 | 192 |
println "" |
| 193 |
|
|
| 193 |
|
|
| 194 | 194 |
//TRANSFORM INTO XML-TEI-TXM |
| 195 | 195 |
println("Building XML-TXM ("+txmDir.listFiles().length+" files)")
|
| 196 | 196 |
for (File tfile : tokenizedDir.listFiles()) {
|
| 197 | 197 |
print "." |
| 198 | 198 |
String filename = tfile.getName().substring(0, tfile.getName().length()-4) |
| 199 | 199 |
File xmlfile = new File(txmDir, tfile.getName()) |
| 200 |
|
|
| 200 |
|
|
| 201 | 201 |
def correspType = new HashMap<String,String>() |
| 202 | 202 |
correspType.put("event","event");
|
| 203 | 203 |
correspType.put("audio","audio");
|
| ... | ... | |
| 227 | 227 |
def resps = new HashMap<String,String[]>(); |
| 228 | 228 |
resps.put("trs", ["Transcriber annotations","TXM","",""])
|
| 229 | 229 |
String wordprefix = "w_"; |
| 230 |
|
|
| 230 |
|
|
| 231 | 231 |
Xml2Ana builder = new Xml2Ana(tfile); |
| 232 | 232 |
builder.setConvertAllAtrtibutes true; |
| 233 | 233 |
builder.setCorrespondances(correspRef, correspType); |
| ... | ... | |
| 237 | 237 |
xmlfile.delete(); |
| 238 | 238 |
} |
| 239 | 239 |
} |
| 240 |
|
|
| 240 |
|
|
| 241 | 241 |
println "" |
| 242 | 242 |
return txmDir.listFiles() != null; |
| 243 | 243 |
} |
| ... | ... | |
| 251 | 251 |
* @return true, if successful |
| 252 | 252 |
*/ |
| 253 | 253 |
public boolean process(File infile, File outfile, ArrayList<Pair<String, String>> metas) {
|
| 254 |
//inject metadatas into
|
|
| 254 |
//inject metadatas into |
|
| 255 | 255 |
this.infile = infile; |
| 256 | 256 |
this.outfile = outfile; |
| 257 | 257 |
def factory = DocumentBuilderFactory.newInstance() |
| ... | ... | |
| 272 | 272 |
println ("insert $pairs into $xpath")
|
| 273 | 273 |
def expr = XPathFactory.newInstance().newXPath().compile(xpath) |
| 274 | 274 |
def nodes = expr.evaluate(doc, XPathConstants.NODESET) |
| 275 |
|
|
| 275 |
|
|
| 276 | 276 |
for (Node node : nodes) {
|
| 277 | 277 |
Element elem = (Element)node; |
| 278 | 278 |
for (Pair<String, String> p : pairs) {
|
| ... | ... | |
| 290 | 290 |
try {
|
| 291 | 291 |
// Création de la source DOM |
| 292 | 292 |
Source source = new DOMSource(doc); |
| 293 |
|
|
| 293 |
|
|
| 294 | 294 |
// Création du fichier de sortie |
| 295 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8"));
|
|
| 295 |
Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF-8")); |
|
| 296 | 296 |
Result resultat = new StreamResult(writer); |
| 297 |
|
|
| 297 |
|
|
| 298 | 298 |
// Configuration du transformer |
| 299 | 299 |
TransformerFactory fabrique = new net.sf.saxon.TransformerFactoryImpl(); |
| 300 | 300 |
Transformer transformer = fabrique.newTransformer(); |
| 301 | 301 |
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
| 302 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
|
| 303 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
|
|
| 304 |
|
|
| 302 |
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
| 303 |
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); |
|
| 304 |
|
|
| 305 | 305 |
// Transformation |
| 306 | 306 |
transformer.transform(source, resultat); |
| 307 | 307 |
writer.close(); |
| tmp/org.txm.groovy.core/src/groovy/org/txm/macroproto/importer/XTZImporterMacro.groovy (revision 967) | ||
|---|---|---|
| 210 | 210 |
if (srcfiles != null) |
| 211 | 211 |
for (int i = 0 ; i < srcfiles.size() ; i++) {// check XML format, and copy file into binDir
|
| 212 | 212 |
File f = srcfiles.get(i) |
| 213 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties")) {
|
|
| 213 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties")) {
|
|
| 214 | 214 |
srcfiles.remove(i); |
| 215 | 215 |
i--; |
| 216 | 216 |
continue; |
| tmp/org.txm.tigersearch.rcp/groovy/org/txm/importer/srcmf/srcmfLoader.groovy (revision 967) | ||
|---|---|---|
| 103 | 103 |
// copy txm files |
| 104 | 104 |
List<File> srcfiles = txmSrcDir.listFiles(); |
| 105 | 105 |
for (File f : srcfiles) {// check XML format, and copy file into binDir
|
| 106 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\.....?") || f.getName().endsWith(".properties"))
|
|
| 106 |
if (f.getName().equals("import.xml") || f.getName().matches("metadata\\.....?") || f.getName().endsWith(".properties"))
|
|
| 107 | 107 |
continue; |
| 108 | 108 |
if (ValidateXml.test(f)) {
|
| 109 | 109 |
FileCopy.copy(f, new File(txmDir, f.getName())); |
| tmp/org.txm.core/src/java/org/txm/metadatas/Metadatas.java (revision 967) | ||
|---|---|---|
| 138 | 138 |
f = new File(directory, "metadata.tsv"); |
| 139 | 139 |
if (f.exists()) return f; |
| 140 | 140 |
|
| 141 |
f = new File(directory, "metadata.csv"); |
|
| 142 |
if (f.exists()) return f; |
|
| 143 |
|
|
| 144 |
return null; |
|
| 141 |
return new File(directory, "metadata.csv"); |
|
| 145 | 142 |
} |
| 146 | 143 |
|
| 147 | 144 |
/** |
| ... | ... | |
| 397 | 394 |
|
| 398 | 395 |
if (headers.length == 0) |
| 399 | 396 |
{
|
| 400 |
System.out.println("Error: No header in the metadata file "+csvfile);
|
|
| 397 |
System.out.println("Error: No header in the metadata file "+csvfile+" with separators: column='"+separator+"' and text='"+txtseparator+"'");
|
|
| 398 |
writer.close(); |
|
| 399 |
output.close(); |
|
| 401 | 400 |
return false; |
| 402 | 401 |
} |
| 403 | 402 |
|
| 404 | 403 |
if(!headers[0].equals("id"))
|
| 405 | 404 |
{
|
| 406 |
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"'");
|
|
| 407 |
return false; |
|
| 405 |
System.out.println("Error: The first column name in the header line of the metadata file '$csvfile' must be 'id' and found '"+headers[0]+"' column separator='\"+separator+\"' and text separator='\"+txtseparator+\"'");
|
|
| 406 |
writer.close(); |
|
| 407 |
output.close(); |
|
| 408 |
if (!separator.equals("\t")) {
|
|
| 409 |
System.out.println("\tTrying with separators: column='\t' and text=''...");
|
|
| 410 |
return convertCsvToXml(csvfile, xmlFile, encoding, "\t", "", nbheaderline); |
|
| 411 |
} |
|
| 408 | 412 |
} |
| 409 | 413 |
|
| 410 | 414 |
//check for double columns |
Formats disponibles : Unified diff