Révision 2998
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 2998) | ||
|---|---|---|
| 139 | 139 |
|
| 140 | 140 |
isSuccessFul = filesToProcess.size() > 0 |
| 141 | 141 |
|
| 142 |
String cleanDirectories = project.getCleanAfterBuild(); |
|
| 143 |
if ("true".equals(cleanDirectories)) {
|
|
| 142 |
if (project.getCleanAfterBuild()) {
|
|
| 144 | 143 |
new File(module.getBinaryDirectory(), "tokenized").deleteDir() |
| 145 | 144 |
new File(module.getBinaryDirectory(), "src").deleteDir() |
| 146 | 145 |
new File(module.getBinaryDirectory(), "split").deleteDir() |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 2998) | ||
|---|---|---|
| 93 | 93 |
if (!doCWBEncodeStep()) return; |
| 94 | 94 |
if (!doCWBMakeAllStep()) return; |
| 95 | 95 |
|
| 96 |
if (module.getProject().getCleanAfterBuild() && !module.getProject().getDoUpdate()) {
|
|
| 96 |
if (module.getProject().getCleanAfterBuild() |
|
| 97 |
&& !module.getProject().getDoUpdate()) { // for optimization purpose, don't clean the CQP files
|
|
| 97 | 98 |
new File(module.getBinaryDirectory(), "cqp").deleteDir() |
| 98 | 99 |
} |
| 99 | 100 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/TTAnnotater.groovy (revision 2998) | ||
|---|---|---|
| 45 | 45 |
|
| 46 | 46 |
if (cleanDirectories) {
|
| 47 | 47 |
new File(module.getBinaryDirectory(), "treetagger").deleteDir() |
| 48 |
new File(module.getBinaryDirectory(), "ptreetagger").deleteDir() |
|
| 48 | 49 |
new File(module.getBinaryDirectory(), "annotations").deleteDir() |
| 49 | 50 |
} |
| 50 | 51 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxm/compiler.groovy (revision 2998) | ||
|---|---|---|
| 472 | 472 |
ex.printStackTrace(); |
| 473 | 473 |
return false; |
| 474 | 474 |
} |
| 475 |
|
|
| 476 |
if (project.getCleanAfterBuild()) {
|
|
| 477 |
new File(binDir, "cqp").deleteDir() |
|
| 478 |
} |
|
| 475 | 479 |
|
| 476 | 480 |
return true; |
| 477 | 481 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/compiler.groovy (revision 2998) | ||
|---|---|---|
| 245 | 245 |
|
| 246 | 246 |
} catch (Exception ex) { System.err.println(ex); return false;}
|
| 247 | 247 |
|
| 248 |
if (project.getCleanAfterBuild()) {
|
|
| 249 |
new File(binDir, "cqp").deleteDir() |
|
| 250 |
} |
|
| 251 |
|
|
| 248 | 252 |
return true; |
| 249 | 253 |
} |
| 250 | 254 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/importer.groovy (revision 2998) | ||
|---|---|---|
| 70 | 70 |
* @param basename the basename |
| 71 | 71 |
* @return true, if successful |
| 72 | 72 |
*/ |
| 73 |
public boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename) |
|
| 73 |
public boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, def project)
|
|
| 74 | 74 |
{
|
| 75 | 75 |
String rootDir = srcDir.getAbsolutePath()+"/" |
| 76 | 76 |
// scanning directory brut/*.cnr |
| ... | ... | |
| 262 | 262 |
} else print "." |
| 263 | 263 |
} |
| 264 | 264 |
println "" |
| 265 |
|
|
| 266 |
if (project.getCleanAfterBuild()) {
|
|
| 267 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
| 268 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
| 269 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
| 270 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
| 271 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
| 272 |
} |
|
| 265 | 273 |
return true; |
| 266 | 274 |
} |
| 267 | 275 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/discoursLoader.groovy (revision 2998) | ||
|---|---|---|
| 75 | 75 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 76 | 76 |
println "-- IMPORTER - Reading source files" |
| 77 | 77 |
def imp = new importer(); |
| 78 |
if (!imp.run(srcDir, binDir, txmDir, encoding, basename)) {
|
|
| 78 |
if (!imp.run(srcDir, binDir, txmDir, encoding, basename, project)) {
|
|
| 79 | 79 |
println "import process stopped"; |
| 80 | 80 |
return; |
| 81 | 81 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/frantext/frantextLoader.groovy (revision 2998) | ||
|---|---|---|
| 159 | 159 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 160 | 160 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 161 | 161 |
annotate_status = true; |
| 162 |
if (project.getCleanAfterBuild()) {
|
|
| 163 |
new File(binDir, "treetagger").deleteDir() |
|
| 164 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 165 |
new File(binDir, "annotations").deleteDir() |
|
| 166 |
} |
|
| 162 | 167 |
} |
| 163 | 168 |
} |
| 164 | 169 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 2998) | ||
|---|---|---|
| 198 | 198 |
|
| 199 | 199 |
imp.doTokenize(doTokenizeStep) // change this, to not tokenize xml |
| 200 | 200 |
imp.setStopIfMalformed(stopIfMalformed) |
| 201 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang)) {
|
|
| 201 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang, project)) {
|
|
| 202 | 202 |
println "import process stopped" |
| 203 | 203 |
return |
| 204 | 204 |
} |
| ... | ... | |
| 240 | 240 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 241 | 241 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 242 | 242 |
annotationSuccess = true |
| 243 |
if (project.getCleanAfterBuild()) {
|
|
| 244 |
new File(binDir, "treetagger").deleteDir() |
|
| 245 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 246 |
new File(binDir, "annotations").deleteDir() |
|
| 247 |
} |
|
| 243 | 248 |
} |
| 244 | 249 |
} |
| 245 | 250 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/compiler.groovy (revision 2998) | ||
|---|---|---|
| 716 | 716 |
cwbMa.run(corpusname, outDir + "/registry"); |
| 717 | 717 |
} catch (Exception ex) {System.out.println(ex); return false;}
|
| 718 | 718 |
|
| 719 |
if (project.getCleanAfterBuild()) {
|
|
| 720 |
new File(binDir, "cqp").deleteDir() |
|
| 721 |
} |
|
| 722 |
|
|
| 719 | 723 |
return true; |
| 720 | 724 |
} |
| 721 | 725 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/importer.groovy (revision 2998) | ||
|---|---|---|
| 82 | 82 |
* @param basename the basename |
| 83 | 83 |
* @return true, if successful |
| 84 | 84 |
*/ |
| 85 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, String ignoredElements, String lang) |
|
| 85 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, String ignoredElements, String lang, def project)
|
|
| 86 | 86 |
{
|
| 87 | 87 |
new File(binDir,"tokenized").deleteDir() |
| 88 | 88 |
new File(binDir,"tokenized").mkdir() |
| ... | ... | |
| 224 | 224 |
cpb.done() |
| 225 | 225 |
okfiles = txmDir.listFiles() |
| 226 | 226 |
|
| 227 |
if (project.getCleanAfterBuild()) {
|
|
| 228 |
new File(binDir, "tokenized").deleteDir() |
|
| 229 |
new File(binDir, "ptokenized").deleteDir() |
|
| 230 |
new File(binDir, "stokenized").deleteDir() |
|
| 231 |
new File(binDir, "src").deleteDir() |
|
| 232 |
new File(binDir, "split").deleteDir() |
|
| 233 |
} |
|
| 234 |
|
|
| 227 | 235 |
return okfiles != null && okfiles.size() > 0; |
| 228 | 236 |
} |
| 229 | 237 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/importer.groovy (revision 2998) | ||
|---|---|---|
| 64 | 64 |
*/ |
| 65 | 65 |
class importer {
|
| 66 | 66 |
|
| 67 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, Properties metadataXPath) |
|
| 67 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, Properties metadataXPath, def project)
|
|
| 68 | 68 |
{
|
| 69 | 69 |
new File(binDir, "ptokenized").deleteDir(); |
| 70 | 70 |
new File(binDir, "ptokenized").mkdir(); |
| ... | ... | |
| 253 | 253 |
} |
| 254 | 254 |
} |
| 255 | 255 |
println("")
|
| 256 |
|
|
| 257 |
if (project.getCleanAfterBuild()) {
|
|
| 258 |
new File(binDir, "tokenized").deleteDir() |
|
| 259 |
new File(binDir, "stokenized").deleteDir() |
|
| 260 |
new File(binDir, "headers").deleteDir() |
|
| 261 |
} |
|
| 256 | 262 |
return true; |
| 257 | 263 |
} |
| 258 | 264 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/compiler.groovy (revision 2998) | ||
|---|---|---|
| 960 | 960 |
} |
| 961 | 961 |
cwbMa.run(corpusname, outDir + "/registry"); |
| 962 | 962 |
} catch (Exception ex) {System.out.println(ex);return false;}
|
| 963 |
|
|
| 963 |
|
|
| 964 |
if (project.getCleanAfterBuild()) {
|
|
| 965 |
new File(binDir, "cqp").deleteDir() |
|
| 966 |
} |
|
| 967 |
|
|
| 964 | 968 |
return true; |
| 965 | 969 |
} |
| 966 | 970 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/bfmLoader.groovy (revision 2998) | ||
|---|---|---|
| 125 | 125 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 126 | 126 |
println "-- IMPORTER - Reading source files" |
| 127 | 127 |
def imp = new importer() |
| 128 |
if (!imp.run(srcDir, binDir, txmDir, basename, metadataXPath)) {
|
|
| 128 |
if (!imp.run(srcDir, binDir, txmDir, basename, metadataXPath, project)) {
|
|
| 129 | 129 |
println "import process stopped"; |
| 130 | 130 |
return; |
| 131 | 131 |
} |
| ... | ... | |
| 139 | 139 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 140 | 140 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 141 | 141 |
annotate_status = true; |
| 142 |
|
|
| 143 |
if (project.getCleanAfterBuild()) {
|
|
| 144 |
new File(binDir, "treetagger").deleteDir() |
|
| 145 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 146 |
new File(binDir, "annotations").deleteDir() |
|
| 147 |
} |
|
| 142 | 148 |
} |
| 143 | 149 |
} |
| 144 | 150 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/docLoader.groovy (revision 2998) | ||
|---|---|---|
| 217 | 217 |
def imp = new importer(); |
| 218 | 218 |
imp.doValidation(true) // change this to not validate xml |
| 219 | 219 |
imp.doTokenize(true) // change this, to not tokenize xml |
| 220 |
if (!imp.run(srcDir, binDir, txmDir, basename, "", lang)) {
|
|
| 220 |
if (!imp.run(srcDir, binDir, txmDir, basename, "", lang, project)) {
|
|
| 221 | 221 |
println "import process stopped"; |
| 222 | 222 |
return; |
| 223 | 223 |
} |
| ... | ... | |
| 260 | 260 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 261 | 261 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 262 | 262 |
annotationSuccess = true; |
| 263 |
if (project.getCleanAfterBuild()) {
|
|
| 264 |
new File(binDir, "treetagger").deleteDir() |
|
| 265 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 266 |
new File(binDir, "annotations").deleteDir() |
|
| 267 |
} |
|
| 263 | 268 |
} |
| 264 | 269 |
} |
| 265 | 270 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 2998) | ||
|---|---|---|
| 81 | 81 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 82 | 82 |
if (MONITOR != null) MONITOR.worked(20, "IMPORTER - Reading source files"); |
| 83 | 83 |
def imp = new importer() |
| 84 |
imp.run(srcDir, binDir, txmDir, textLangs, langGroups); |
|
| 84 |
imp.run(srcDir, binDir, txmDir, textLangs, langGroups, project);
|
|
| 85 | 85 |
def corpusIDS = imp.getCorpusIDS() |
| 86 | 86 |
|
| 87 | 87 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE - Running NLP tools"); |
| ... | ... | |
| 90 | 90 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 91 | 91 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 92 | 92 |
annotationSuccess = true; |
| 93 |
if (project.getCleanAfterBuild()) {
|
|
| 94 |
new File(binDir, "treetagger").deleteDir() |
|
| 95 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 96 |
new File(binDir, "annotations").deleteDir() |
|
| 97 |
} |
|
| 93 | 98 |
} |
| 94 | 99 |
} |
| 95 | 100 |
println "langs : "+textLangs |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/importer.groovy (revision 2998) | ||
|---|---|---|
| 67 | 67 |
* @param rootDirFile the root dir file |
| 68 | 68 |
* @param base the base |
| 69 | 69 |
*/ |
| 70 |
public static void run(File srcDir, File binDir, File txmDir, HashMap<String, String> langs, HashMap<String, String> originalTexts) |
|
| 70 |
public static void run(File srcDir, File binDir, File txmDir, HashMap<String, String> langs, HashMap<String, String> originalTexts, def project)
|
|
| 71 | 71 |
{
|
| 72 | 72 |
String filename = binDir.getName(); |
| 73 | 73 |
|
| ... | ... | |
| 142 | 142 |
} |
| 143 | 143 |
} |
| 144 | 144 |
println "" |
| 145 |
|
|
| 146 |
if (project.getCleanAfterBuild()) {
|
|
| 147 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
| 148 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
| 149 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
| 150 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
| 151 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
| 152 |
} |
|
| 145 | 153 |
} |
| 146 | 154 |
|
| 147 | 155 |
public def getCorpusIDS() {
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/compiler.groovy (revision 2998) | ||
|---|---|---|
| 640 | 640 |
} |
| 641 | 641 |
} |
| 642 | 642 |
} |
| 643 |
|
|
| 644 |
if (project.getCleanAfterBuild()) {
|
|
| 645 |
new File(binDir, "cqp").deleteDir() |
|
| 646 |
} |
|
| 643 | 647 |
|
| 644 | 648 |
return true; |
| 645 | 649 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 2998) | ||
|---|---|---|
| 178 | 178 |
imp.doValidation(true) // change this to not validate xml |
| 179 | 179 |
imp.doTokenize(true) // change this, to not tokenize xml |
| 180 | 180 |
imp.setStopIfMalformed(stopIfMalformed); |
| 181 |
if (!imp.run( srcDir, binDir, txmDir, basename, null, lang)) {
|
|
| 181 |
if (!imp.run( srcDir, binDir, txmDir, basename, null, lang, project)) {
|
|
| 182 | 182 |
println "import process stopped"; |
| 183 | 183 |
return; |
| 184 | 184 |
} |
| ... | ... | |
| 218 | 218 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 219 | 219 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 220 | 220 |
annotationSuccess = true; |
| 221 |
if (project.getCleanAfterBuild()) {
|
|
| 222 |
new File(binDir, "treetagger").deleteDir() |
|
| 223 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 224 |
new File(binDir, "annotations").deleteDir() |
|
| 225 |
} |
|
| 221 | 226 |
} |
| 222 | 227 |
} |
| 223 | 228 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 2998) | ||
|---|---|---|
| 84 | 84 |
|
| 85 | 85 |
String lang; // language used by the tokenizer |
| 86 | 86 |
|
| 87 |
Project corpusProject;
|
|
| 87 |
Project project;
|
|
| 88 | 88 |
|
| 89 | 89 |
/** |
| 90 | 90 |
* Instantiates a new importer. |
| ... | ... | |
| 93 | 93 |
* @param outdir the outdir |
| 94 | 94 |
* @param metadatas the metadatas |
| 95 | 95 |
*/ |
| 96 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang, Project corpusProject) {
|
|
| 96 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang, Project project) {
|
|
| 97 | 97 |
this.trsfiles = trsfiles; |
| 98 | 98 |
this.txmDir = txmDir; |
| 99 | 99 |
this.binDir = binDir; |
| 100 | 100 |
this.metadatas = metadatas; |
| 101 | 101 |
this.lang = lang; |
| 102 |
this.corpusProject = corpusProject;
|
|
| 102 |
this.project = project;
|
|
| 103 | 103 |
} |
| 104 | 104 |
|
| 105 | 105 |
/** |
| ... | ... | |
| 247 | 247 |
} |
| 248 | 248 |
} |
| 249 | 249 |
|
| 250 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
| 251 |
if ("true".equals(cleanDirectories)) {
|
|
| 252 |
new File(corpusProject.getProjectDirectory(), "tokenized").deleteDir() |
|
| 253 |
new File(corpusProject.getProjectDirectory(), "src").deleteDir() |
|
| 254 |
new File(corpusProject.getProjectDirectory(), "split").deleteDir() |
|
| 250 |
if (project.getCleanAfterBuild()) {
|
|
| 251 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
| 252 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
| 253 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
| 255 | 254 |
} |
| 256 | 255 |
|
| 257 | 256 |
cpb.done() |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2998) | ||
|---|---|---|
| 67 | 67 |
String userDir = System.getProperty("user.home");
|
| 68 | 68 |
|
| 69 | 69 |
def MONITOR; |
| 70 |
Project corpusProject;
|
|
| 70 |
Project project;
|
|
| 71 | 71 |
|
| 72 |
try {corpusProject=projectBinding;MONITOR=monitor} catch (Exception)
|
|
| 72 |
try {project=projectBinding;MONITOR=monitor} catch (Exception)
|
|
| 73 | 73 |
{ }
|
| 74 |
if (corpusProject == null) { println "no project set. Aborting"; return; }
|
|
| 74 |
if (project == null) { println "no project set. Aborting"; return; }
|
|
| 75 | 75 |
|
| 76 |
String corpusname = corpusProject.getName();
|
|
| 76 |
String corpusname = project.getName();
|
|
| 77 | 77 |
String basename = corpusname |
| 78 |
String rootDir = corpusProject.getSrcdir();
|
|
| 79 |
String lang = corpusProject.getLang()
|
|
| 78 |
String rootDir = project.getSrcdir();
|
|
| 79 |
String lang = project.getLang()
|
|
| 80 | 80 |
String model = lang |
| 81 |
String encoding = corpusProject.getEncoding()
|
|
| 82 |
boolean annotate = corpusProject.getAnnotate()
|
|
| 83 |
String xsl = corpusProject.getFrontXSL();
|
|
| 84 |
def xslParams = corpusProject.getXsltParameters();
|
|
| 85 |
int wordsPerPage = corpusProject.getEditionDefinition("default").getWordsPerPage()
|
|
| 86 |
String page_element = corpusProject.getEditionDefinition("default").getPageElement()
|
|
| 87 |
boolean build_edition = corpusProject.getEditionDefinition("default").getBuildEdition()
|
|
| 88 |
boolean update = corpusProject.getDoUpdate()
|
|
| 81 |
String encoding = project.getEncoding()
|
|
| 82 |
boolean annotate = project.getAnnotate()
|
|
| 83 |
String xsl = project.getFrontXSL();
|
|
| 84 |
def xslParams = project.getXsltParameters();
|
|
| 85 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
|
|
| 86 |
String page_element = project.getEditionDefinition("default").getPageElement()
|
|
| 87 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
|
|
| 88 |
boolean update = project.getDoUpdate()
|
|
| 89 | 89 |
|
| 90 | 90 |
File srcDir = new File(rootDir); |
| 91 |
File binDir = corpusProject.getProjectDirectory();
|
|
| 91 |
File binDir = project.getProjectDirectory();
|
|
| 92 | 92 |
binDir.mkdirs(); |
| 93 | 93 |
if (!binDir.exists()) {
|
| 94 | 94 |
println "Could not create binDir "+binDir |
| ... | ... | |
| 195 | 195 |
if (MONITOR != null) MONITOR.worked(1, "IMPORTER") |
| 196 | 196 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 197 | 197 |
println "-- IMPORTER" |
| 198 |
def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang, corpusProject) //put result in the txm folder of binDir
|
|
| 198 |
def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang, project) //put result in the txm folder of binDir
|
|
| 199 | 199 |
if (!imp.run()) {
|
| 200 | 200 |
println "Failed to prepare files - Aborting"; |
| 201 | 201 |
return; |
| ... | ... | |
| 243 | 243 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 244 | 244 |
annotationSuccess = true; |
| 245 | 245 |
|
| 246 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
| 247 |
if ("true".equals(cleanDirectories)) {
|
|
| 248 |
new File(corpusProject.getProjectDirectory(), "ptreetagger").deleteDir() |
|
| 249 |
new File(corpusProject.getProjectDirectory(), "treetagger").deleteDir() |
|
| 250 |
new File(corpusProject.getProjectDirectory(), "annotations").deleteDir() |
|
| 246 |
if (project.getCleanAfterBuild()) {
|
|
| 247 |
new File(binDir, "treetagger").deleteDir() |
|
| 248 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 249 |
new File(binDir, "annotations").deleteDir() |
|
| 251 | 250 |
} |
| 252 | 251 |
} |
| 253 | 252 |
} |
| ... | ... | |
| 284 | 283 |
if(debug) comp.setDebug(); |
| 285 | 284 |
comp.removeInterviewers(removeInterviewer); |
| 286 | 285 |
comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata); |
| 287 |
if (!comp.run(corpusProject, xmltxmFiles, corpusname, "default", binDir)) {
|
|
| 286 |
if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
|
|
| 288 | 287 |
println "Failed to compile files"; |
| 289 | 288 |
return; |
| 290 | 289 |
} |
| ... | ... | |
| 312 | 311 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
| 313 | 312 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
| 314 | 313 |
|
| 315 |
Text t = corpusProject.getText(txtname)
|
|
| 314 |
Text t = project.getText(txtname)
|
|
| 316 | 315 |
if (t == null) {
|
| 317 |
t = new Text(corpusProject);
|
|
| 316 |
t = new Text(project);
|
|
| 318 | 317 |
t.setName(txtname); |
| 319 | 318 |
} |
| 320 | 319 |
t.setSourceFile(txmFile) |
| ... | ... | |
| 356 | 355 |
String txtname = txmFile.getName(); |
| 357 | 356 |
int i = txtname.lastIndexOf(".");
|
| 358 | 357 |
if(i > 0) txtname = txtname.substring(0, i); |
| 359 |
File mediaFile = new File(corpusProject.getSrcdir(), txtname + ".mp3")
|
|
| 360 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".wav")
|
|
| 361 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".mp4")
|
|
| 362 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".avi")
|
|
| 358 |
File mediaFile = new File(project.getSrcdir(), txtname + ".mp3")
|
|
| 359 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
|
|
| 360 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
|
|
| 361 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
|
|
| 363 | 362 |
|
| 364 | 363 |
if (mediaFile.exists()) {
|
| 365 | 364 |
File copy = new File(binDir, "media/"+mediaFile.getName()) |
| ... | ... | |
| 374 | 373 |
|
| 375 | 374 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 376 | 375 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
| 377 |
readyToLoad = corpusProject.save(); |
|
| 376 |
readyToLoad = project.save(); |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2998) | ||
|---|---|---|
| 117 | 117 |
* @param outdir the outdir |
| 118 | 118 |
* @return true, if successful |
| 119 | 119 |
*/ |
| 120 |
public boolean run(Project corpusProject, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
|
|
| 120 |
public boolean run(Project project, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
|
|
| 121 | 121 |
|
| 122 | 122 |
//println "run compiler with $xmlfiles, $basename and $outdir" |
| 123 | 123 |
this.outdir = binDir; |
| ... | ... | |
| 129 | 129 |
|
| 130 | 130 |
sectionAttrs = new HashSet<String>() // reset section attributs set |
| 131 | 131 |
|
| 132 |
CorpusBuild corpus = corpusProject.getCorpusBuild(corpusProject.getName(), MainCorpus.class);
|
|
| 132 |
CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
|
|
| 133 | 133 |
if (corpus != null) {
|
| 134 |
if (corpusProject.getDoUpdate()) {
|
|
| 134 |
if (project.getDoUpdate()) {
|
|
| 135 | 135 |
corpus.clean(); // remove old files |
| 136 | 136 |
} else {
|
| 137 | 137 |
corpus.delete(); // remove old files and TXMResult children |
| 138 | 138 |
} |
| 139 | 139 |
} else {
|
| 140 |
corpus = new MainCorpus(corpusProject);
|
|
| 141 |
corpus.setID(corpusProject.getName());
|
|
| 142 |
corpus.setName(corpusProject.getName());
|
|
| 140 |
corpus = new MainCorpus(project);
|
|
| 141 |
corpus.setID(project.getName());
|
|
| 142 |
corpus.setName(project.getName());
|
|
| 143 | 143 |
} |
| 144 | 144 |
corpus.setDescription("Built with the XML-TRS import module");
|
| 145 | 145 |
|
| ... | ... | |
| 236 | 236 |
|
| 237 | 237 |
} catch (Exception ex) {System.out.println(ex); return false;}
|
| 238 | 238 |
|
| 239 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
| 240 |
if ("true".equals(cleanDirectories)) {
|
|
| 241 |
new File(corpusProject.getProjectDirectory(), "cqp").deleteDir() |
|
| 239 |
if (project.getCleanAfterBuild()) {
|
|
| 240 |
new File(project.getProjectDirectory(), "cqp").deleteDir() |
|
| 242 | 241 |
} |
| 243 | 242 |
|
| 244 | 243 |
return true; |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TRSToTEI.groovy (revision 2998) | ||
|---|---|---|
| 206 | 206 |
writer.writeAttribute("time", formatedTime)
|
| 207 | 207 |
|
| 208 | 208 |
writer.writeAttribute("start", time)
|
| 209 |
writer.writeAttribute("end", parser.getAttributeValue(null, "endTime"))
|
|
| 209 |
writer.writeAttribute("end", ""+parser.getAttributeValue(null, "endTime"))
|
|
| 210 |
writer.writeAttribute("who", ""+parser.getAttributeValue(null, "speaker"))
|
|
| 210 | 211 |
|
| 211 |
writer.writeAttribute("who", parser.getAttributeValue(null, "speaker"))
|
|
| 212 |
|
|
| 213 | 212 |
for (int i = 0; i < parser.getAttributeCount(); i++) { // write other attributes if any
|
| 214 | 213 |
String v = parser.getAttributeLocalName(i); |
| 215 | 214 |
if (!("who".equals(v)) && !("overlap".equals(v)) && !("time".equals(v)) && !("speaker".equals(v)) && !("endTime".equals(v)) && !("startTime".equals(v))) {
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 2998) | ||
|---|---|---|
| 98 | 98 |
println "-- IMPORTER - Reading source files with extension "+suffixes |
| 99 | 99 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 100 | 100 |
if (MONITOR != null) MONITOR.worked(20, "IMPORTER - Reading source files with extension "+suffixes) |
| 101 |
if (!new importer().run(srcDir, binDir, txmDir,encoding, suffixes, basename, lang)) {
|
|
| 101 |
if (!new importer().run(srcDir, binDir, txmDir,encoding, suffixes, basename, lang, project)) {
|
|
| 102 | 102 |
println "Import process stopped"; |
| 103 | 103 |
return; |
| 104 | 104 |
} |
| ... | ... | |
| 136 | 136 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 137 | 137 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 138 | 138 |
annotationSuccess = true; |
| 139 |
if (project.getCleanAfterBuild()) {
|
|
| 140 |
new File(binDir, "treetagger").deleteDir() |
|
| 141 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 142 |
new File(binDir, "annotations").deleteDir() |
|
| 143 |
} |
|
| 139 | 144 |
} |
| 140 | 145 |
} |
| 141 | 146 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/compiler.groovy (revision 2998) | ||
|---|---|---|
| 267 | 267 |
cwbMa.run(corpusname, outDir + "/registry"); |
| 268 | 268 |
} catch (Exception ex) {System.out.println("CWB error: "+ex); return false;}
|
| 269 | 269 |
|
| 270 |
if (project.getCleanAfterBuild()) {
|
|
| 271 |
new File(binDir, "cqp").deleteDir() |
|
| 272 |
} |
|
| 273 |
|
|
| 270 | 274 |
return true; |
| 271 | 275 |
} |
| 272 | 276 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/importer.groovy (revision 2998) | ||
|---|---|---|
| 56 | 56 |
* @param basename the basename |
| 57 | 57 |
* @return true, if successful |
| 58 | 58 |
*/ |
| 59 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang) {
|
|
| 59 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang, def project) {
|
|
| 60 | 60 |
|
| 61 | 61 |
File stokenizedDir = new File(binDir,"stokenized"); |
| 62 | 62 |
stokenizedDir.deleteDir(); |
| ... | ... | |
| 297 | 297 |
files = txmDir.listFiles() |
| 298 | 298 |
if (files == null || files.size() == 0) return false |
| 299 | 299 |
|
| 300 |
if (project.getCleanAfterBuild()) {
|
|
| 301 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
| 302 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
| 303 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
| 304 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
| 305 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
| 306 |
} |
|
| 307 |
|
|
| 300 | 308 |
return true; |
| 301 | 309 |
} |
| 302 | 310 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/importer.groovy (revision 2998) | ||
|---|---|---|
| 68 | 68 |
* @param basename the basename |
| 69 | 69 |
* @return true, if successful |
| 70 | 70 |
*/ |
| 71 |
public static boolean run(File rootDirFile, File binDir, File txmDir, String encoding, String basename, String lang) |
|
| 71 |
public static boolean run(File rootDirFile, File binDir, File txmDir, String encoding, String basename, String lang, def project)
|
|
| 72 | 72 |
{
|
| 73 | 73 |
if (rootDirFile.listFiles() == null || rootDirFile.listFiles().size() == 0) {
|
| 74 | 74 |
println "Error: no file to process in "+rootDirFile; |
| ... | ... | |
| 184 | 184 |
} |
| 185 | 185 |
} |
| 186 | 186 |
cpb.done() |
| 187 |
|
|
| 188 |
if (project.getCleanAfterBuild()) {
|
|
| 189 |
new File(binDir, "tokenized").deleteDir() |
|
| 190 |
new File(binDir, "stokenized").deleteDir() |
|
| 191 |
new File(binDir, "src").deleteDir() |
|
| 192 |
new File(binDir, "split").deleteDir() |
|
| 193 |
} |
|
| 187 | 194 |
return true; |
| 188 | 195 |
} |
| 189 | 196 |
} |
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/alcesteLoader.groovy (revision 2998) | ||
|---|---|---|
| 78 | 78 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 79 | 79 |
println "-- IMPORTER - Reading source files" |
| 80 | 80 |
|
| 81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang))) {
|
|
| 81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang, project))) {
|
|
| 82 | 82 |
println "import process stopped"; |
| 83 | 83 |
return; |
| 84 | 84 |
} |
| ... | ... | |
| 94 | 94 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 95 | 95 |
annotationSuccess = true; |
| 96 | 96 |
} |
| 97 |
|
|
| 98 |
if (project.getCleanAfterBuild()) {
|
|
| 99 |
new File(binDir, "treetagger").deleteDir() |
|
| 100 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 101 |
new File(binDir, "annotations").deleteDir() |
|
| 102 |
} |
|
| 97 | 103 |
} |
| 98 | 104 |
|
| 99 | 105 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/compiler.groovy (revision 2998) | ||
|---|---|---|
| 246 | 246 |
|
| 247 | 247 |
} catch (Exception ex) {System.out.println(ex); return false;}
|
| 248 | 248 |
|
| 249 |
if (project.getCleanAfterBuild()) {
|
|
| 250 |
new File(binDir, "cqp").deleteDir() |
|
| 251 |
} |
|
| 252 |
|
|
| 249 | 253 |
return true; |
| 250 | 254 |
} |
| 251 | 255 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/hyperbaseLoader.groovy (revision 2998) | ||
|---|---|---|
| 91 | 91 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger")
|
| 92 | 92 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) {
|
| 93 | 93 |
annotationSuccess = true; |
| 94 |
if (project.getCleanAfterBuild()) {
|
|
| 95 |
new File(binDir, "treetagger").deleteDir() |
|
| 96 |
new File(binDir, "ptreetagger").deleteDir() |
|
| 97 |
new File(binDir, "annotations").deleteDir() |
|
| 98 |
} |
|
| 94 | 99 |
} |
| 95 | 100 |
} |
| 96 | 101 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/compiler.groovy (revision 2998) | ||
|---|---|---|
| 248 | 248 |
|
| 249 | 249 |
} catch (Exception ex) {System.out.println(ex); return false;}
|
| 250 | 250 |
|
| 251 |
if (project.getCleanAfterBuild()) {
|
|
| 252 |
new File(binDir, "cqp").deleteDir() |
|
| 253 |
} |
|
| 254 |
|
|
| 251 | 255 |
return true; |
| 252 | 256 |
} |
| 253 | 257 |
|
| tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/cqp/compiler.groovy (revision 2998) | ||
|---|---|---|
| 186 | 186 |
cwbMa.run(corpusname, outDir + "/registry"); |
| 187 | 187 |
return true; |
| 188 | 188 |
} catch (Exception ex) {System.out.println(ex); return false;}
|
| 189 |
|
|
| 189 |
|
|
| 190 |
if (project.getCleanAfterBuild()) {
|
|
| 191 |
new File(binDir, "cqp").deleteDir() |
|
| 192 |
} |
|
| 193 |
|
|
| 190 | 194 |
return true; |
| 191 | 195 |
} |
| 192 | 196 |
|
Formats disponibles : Unified diff