Révision 2998
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 2998) | ||
---|---|---|
139 | 139 |
|
140 | 140 |
isSuccessFul = filesToProcess.size() > 0 |
141 | 141 |
|
142 |
String cleanDirectories = project.getCleanAfterBuild(); |
|
143 |
if ("true".equals(cleanDirectories)) { |
|
142 |
if (project.getCleanAfterBuild()) { |
|
144 | 143 |
new File(module.getBinaryDirectory(), "tokenized").deleteDir() |
145 | 144 |
new File(module.getBinaryDirectory(), "src").deleteDir() |
146 | 145 |
new File(module.getBinaryDirectory(), "split").deleteDir() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 2998) | ||
---|---|---|
93 | 93 |
if (!doCWBEncodeStep()) return; |
94 | 94 |
if (!doCWBMakeAllStep()) return; |
95 | 95 |
|
96 |
if (module.getProject().getCleanAfterBuild() && !module.getProject().getDoUpdate()) { |
|
96 |
if (module.getProject().getCleanAfterBuild() |
|
97 |
&& !module.getProject().getDoUpdate()) { // for optimization purpose, don't clean the CQP files |
|
97 | 98 |
new File(module.getBinaryDirectory(), "cqp").deleteDir() |
98 | 99 |
} |
99 | 100 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/TTAnnotater.groovy (revision 2998) | ||
---|---|---|
45 | 45 |
|
46 | 46 |
if (cleanDirectories) { |
47 | 47 |
new File(module.getBinaryDirectory(), "treetagger").deleteDir() |
48 |
new File(module.getBinaryDirectory(), "ptreetagger").deleteDir() |
|
48 | 49 |
new File(module.getBinaryDirectory(), "annotations").deleteDir() |
49 | 50 |
} |
50 | 51 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxm/compiler.groovy (revision 2998) | ||
---|---|---|
472 | 472 |
ex.printStackTrace(); |
473 | 473 |
return false; |
474 | 474 |
} |
475 |
|
|
476 |
if (project.getCleanAfterBuild()) { |
|
477 |
new File(binDir, "cqp").deleteDir() |
|
478 |
} |
|
475 | 479 |
|
476 | 480 |
return true; |
477 | 481 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/compiler.groovy (revision 2998) | ||
---|---|---|
245 | 245 |
|
246 | 246 |
} catch (Exception ex) { System.err.println(ex); return false;} |
247 | 247 |
|
248 |
if (project.getCleanAfterBuild()) { |
|
249 |
new File(binDir, "cqp").deleteDir() |
|
250 |
} |
|
251 |
|
|
248 | 252 |
return true; |
249 | 253 |
} |
250 | 254 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/importer.groovy (revision 2998) | ||
---|---|---|
70 | 70 |
* @param basename the basename |
71 | 71 |
* @return true, if successful |
72 | 72 |
*/ |
73 |
public boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename) |
|
73 |
public boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, def project)
|
|
74 | 74 |
{ |
75 | 75 |
String rootDir = srcDir.getAbsolutePath()+"/" |
76 | 76 |
// scanning directory brut/*.cnr |
... | ... | |
262 | 262 |
} else print "." |
263 | 263 |
} |
264 | 264 |
println "" |
265 |
|
|
266 |
if (project.getCleanAfterBuild()) { |
|
267 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
268 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
269 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
270 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
271 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
272 |
} |
|
265 | 273 |
return true; |
266 | 274 |
} |
267 | 275 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/discoursLoader.groovy (revision 2998) | ||
---|---|---|
75 | 75 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
76 | 76 |
println "-- IMPORTER - Reading source files" |
77 | 77 |
def imp = new importer(); |
78 |
if (!imp.run(srcDir, binDir, txmDir, encoding, basename)) { |
|
78 |
if (!imp.run(srcDir, binDir, txmDir, encoding, basename, project)) {
|
|
79 | 79 |
println "import process stopped"; |
80 | 80 |
return; |
81 | 81 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/frantext/frantextLoader.groovy (revision 2998) | ||
---|---|---|
159 | 159 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
160 | 160 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
161 | 161 |
annotate_status = true; |
162 |
if (project.getCleanAfterBuild()) { |
|
163 |
new File(binDir, "treetagger").deleteDir() |
|
164 |
new File(binDir, "ptreetagger").deleteDir() |
|
165 |
new File(binDir, "annotations").deleteDir() |
|
166 |
} |
|
162 | 167 |
} |
163 | 168 |
} |
164 | 169 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 2998) | ||
---|---|---|
198 | 198 |
|
199 | 199 |
imp.doTokenize(doTokenizeStep) // change this, to not tokenize xml |
200 | 200 |
imp.setStopIfMalformed(stopIfMalformed) |
201 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang)) { |
|
201 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang, project)) {
|
|
202 | 202 |
println "import process stopped" |
203 | 203 |
return |
204 | 204 |
} |
... | ... | |
240 | 240 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
241 | 241 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
242 | 242 |
annotationSuccess = true |
243 |
if (project.getCleanAfterBuild()) { |
|
244 |
new File(binDir, "treetagger").deleteDir() |
|
245 |
new File(binDir, "ptreetagger").deleteDir() |
|
246 |
new File(binDir, "annotations").deleteDir() |
|
247 |
} |
|
243 | 248 |
} |
244 | 249 |
} |
245 | 250 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/compiler.groovy (revision 2998) | ||
---|---|---|
716 | 716 |
cwbMa.run(corpusname, outDir + "/registry"); |
717 | 717 |
} catch (Exception ex) {System.out.println(ex); return false;} |
718 | 718 |
|
719 |
if (project.getCleanAfterBuild()) { |
|
720 |
new File(binDir, "cqp").deleteDir() |
|
721 |
} |
|
722 |
|
|
719 | 723 |
return true; |
720 | 724 |
} |
721 | 725 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/importer.groovy (revision 2998) | ||
---|---|---|
82 | 82 |
* @param basename the basename |
83 | 83 |
* @return true, if successful |
84 | 84 |
*/ |
85 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, String ignoredElements, String lang) |
|
85 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, String ignoredElements, String lang, def project)
|
|
86 | 86 |
{ |
87 | 87 |
new File(binDir,"tokenized").deleteDir() |
88 | 88 |
new File(binDir,"tokenized").mkdir() |
... | ... | |
224 | 224 |
cpb.done() |
225 | 225 |
okfiles = txmDir.listFiles() |
226 | 226 |
|
227 |
if (project.getCleanAfterBuild()) { |
|
228 |
new File(binDir, "tokenized").deleteDir() |
|
229 |
new File(binDir, "ptokenized").deleteDir() |
|
230 |
new File(binDir, "stokenized").deleteDir() |
|
231 |
new File(binDir, "src").deleteDir() |
|
232 |
new File(binDir, "split").deleteDir() |
|
233 |
} |
|
234 |
|
|
227 | 235 |
return okfiles != null && okfiles.size() > 0; |
228 | 236 |
} |
229 | 237 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/importer.groovy (revision 2998) | ||
---|---|---|
64 | 64 |
*/ |
65 | 65 |
class importer { |
66 | 66 |
|
67 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, Properties metadataXPath) |
|
67 |
public boolean run(File srcDir, File binDir, File txmDir, String basename, Properties metadataXPath, def project)
|
|
68 | 68 |
{ |
69 | 69 |
new File(binDir, "ptokenized").deleteDir(); |
70 | 70 |
new File(binDir, "ptokenized").mkdir(); |
... | ... | |
253 | 253 |
} |
254 | 254 |
} |
255 | 255 |
println("") |
256 |
|
|
257 |
if (project.getCleanAfterBuild()) { |
|
258 |
new File(binDir, "tokenized").deleteDir() |
|
259 |
new File(binDir, "stokenized").deleteDir() |
|
260 |
new File(binDir, "headers").deleteDir() |
|
261 |
} |
|
256 | 262 |
return true; |
257 | 263 |
} |
258 | 264 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/compiler.groovy (revision 2998) | ||
---|---|---|
960 | 960 |
} |
961 | 961 |
cwbMa.run(corpusname, outDir + "/registry"); |
962 | 962 |
} catch (Exception ex) {System.out.println(ex);return false;} |
963 |
|
|
963 |
|
|
964 |
if (project.getCleanAfterBuild()) { |
|
965 |
new File(binDir, "cqp").deleteDir() |
|
966 |
} |
|
967 |
|
|
964 | 968 |
return true; |
965 | 969 |
} |
966 | 970 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/bfmLoader.groovy (revision 2998) | ||
---|---|---|
125 | 125 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
126 | 126 |
println "-- IMPORTER - Reading source files" |
127 | 127 |
def imp = new importer() |
128 |
if (!imp.run(srcDir, binDir, txmDir, basename, metadataXPath)) { |
|
128 |
if (!imp.run(srcDir, binDir, txmDir, basename, metadataXPath, project)) {
|
|
129 | 129 |
println "import process stopped"; |
130 | 130 |
return; |
131 | 131 |
} |
... | ... | |
139 | 139 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
140 | 140 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
141 | 141 |
annotate_status = true; |
142 |
|
|
143 |
if (project.getCleanAfterBuild()) { |
|
144 |
new File(binDir, "treetagger").deleteDir() |
|
145 |
new File(binDir, "ptreetagger").deleteDir() |
|
146 |
new File(binDir, "annotations").deleteDir() |
|
147 |
} |
|
142 | 148 |
} |
143 | 149 |
} |
144 | 150 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/docLoader.groovy (revision 2998) | ||
---|---|---|
217 | 217 |
def imp = new importer(); |
218 | 218 |
imp.doValidation(true) // change this to not validate xml |
219 | 219 |
imp.doTokenize(true) // change this, to not tokenize xml |
220 |
if (!imp.run(srcDir, binDir, txmDir, basename, "", lang)) { |
|
220 |
if (!imp.run(srcDir, binDir, txmDir, basename, "", lang, project)) {
|
|
221 | 221 |
println "import process stopped"; |
222 | 222 |
return; |
223 | 223 |
} |
... | ... | |
260 | 260 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
261 | 261 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
262 | 262 |
annotationSuccess = true; |
263 |
if (project.getCleanAfterBuild()) { |
|
264 |
new File(binDir, "treetagger").deleteDir() |
|
265 |
new File(binDir, "ptreetagger").deleteDir() |
|
266 |
new File(binDir, "annotations").deleteDir() |
|
267 |
} |
|
263 | 268 |
} |
264 | 269 |
} |
265 | 270 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 2998) | ||
---|---|---|
81 | 81 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
82 | 82 |
if (MONITOR != null) MONITOR.worked(20, "IMPORTER - Reading source files"); |
83 | 83 |
def imp = new importer() |
84 |
imp.run(srcDir, binDir, txmDir, textLangs, langGroups); |
|
84 |
imp.run(srcDir, binDir, txmDir, textLangs, langGroups, project);
|
|
85 | 85 |
def corpusIDS = imp.getCorpusIDS() |
86 | 86 |
|
87 | 87 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE - Running NLP tools"); |
... | ... | |
90 | 90 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
91 | 91 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
92 | 92 |
annotationSuccess = true; |
93 |
if (project.getCleanAfterBuild()) { |
|
94 |
new File(binDir, "treetagger").deleteDir() |
|
95 |
new File(binDir, "ptreetagger").deleteDir() |
|
96 |
new File(binDir, "annotations").deleteDir() |
|
97 |
} |
|
93 | 98 |
} |
94 | 99 |
} |
95 | 100 |
println "langs : "+textLangs |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/importer.groovy (revision 2998) | ||
---|---|---|
67 | 67 |
* @param rootDirFile the root dir file |
68 | 68 |
* @param base the base |
69 | 69 |
*/ |
70 |
public static void run(File srcDir, File binDir, File txmDir, HashMap<String, String> langs, HashMap<String, String> originalTexts) |
|
70 |
public static void run(File srcDir, File binDir, File txmDir, HashMap<String, String> langs, HashMap<String, String> originalTexts, def project)
|
|
71 | 71 |
{ |
72 | 72 |
String filename = binDir.getName(); |
73 | 73 |
|
... | ... | |
142 | 142 |
} |
143 | 143 |
} |
144 | 144 |
println "" |
145 |
|
|
146 |
if (project.getCleanAfterBuild()) { |
|
147 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
148 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
149 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
150 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
151 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
152 |
} |
|
145 | 153 |
} |
146 | 154 |
|
147 | 155 |
public def getCorpusIDS() { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/compiler.groovy (revision 2998) | ||
---|---|---|
640 | 640 |
} |
641 | 641 |
} |
642 | 642 |
} |
643 |
|
|
644 |
if (project.getCleanAfterBuild()) { |
|
645 |
new File(binDir, "cqp").deleteDir() |
|
646 |
} |
|
643 | 647 |
|
644 | 648 |
return true; |
645 | 649 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 2998) | ||
---|---|---|
178 | 178 |
imp.doValidation(true) // change this to not validate xml |
179 | 179 |
imp.doTokenize(true) // change this, to not tokenize xml |
180 | 180 |
imp.setStopIfMalformed(stopIfMalformed); |
181 |
if (!imp.run( srcDir, binDir, txmDir, basename, null, lang)) { |
|
181 |
if (!imp.run( srcDir, binDir, txmDir, basename, null, lang, project)) {
|
|
182 | 182 |
println "import process stopped"; |
183 | 183 |
return; |
184 | 184 |
} |
... | ... | |
218 | 218 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
219 | 219 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
220 | 220 |
annotationSuccess = true; |
221 |
if (project.getCleanAfterBuild()) { |
|
222 |
new File(binDir, "treetagger").deleteDir() |
|
223 |
new File(binDir, "ptreetagger").deleteDir() |
|
224 |
new File(binDir, "annotations").deleteDir() |
|
225 |
} |
|
221 | 226 |
} |
222 | 227 |
} |
223 | 228 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/importer.groovy (revision 2998) | ||
---|---|---|
84 | 84 |
|
85 | 85 |
String lang; // language used by the tokenizer |
86 | 86 |
|
87 |
Project corpusProject;
|
|
87 |
Project project;
|
|
88 | 88 |
|
89 | 89 |
/** |
90 | 90 |
* Instantiates a new importer. |
... | ... | |
93 | 93 |
* @param outdir the outdir |
94 | 94 |
* @param metadatas the metadatas |
95 | 95 |
*/ |
96 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang, Project corpusProject) {
|
|
96 |
public importer(ArrayList<File> trsfiles, File binDir, File txmDir, Metadatas metadatas, lang, Project project) {
|
|
97 | 97 |
this.trsfiles = trsfiles; |
98 | 98 |
this.txmDir = txmDir; |
99 | 99 |
this.binDir = binDir; |
100 | 100 |
this.metadatas = metadatas; |
101 | 101 |
this.lang = lang; |
102 |
this.corpusProject = corpusProject;
|
|
102 |
this.project = project;
|
|
103 | 103 |
} |
104 | 104 |
|
105 | 105 |
/** |
... | ... | |
247 | 247 |
} |
248 | 248 |
} |
249 | 249 |
|
250 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
251 |
if ("true".equals(cleanDirectories)) { |
|
252 |
new File(corpusProject.getProjectDirectory(), "tokenized").deleteDir() |
|
253 |
new File(corpusProject.getProjectDirectory(), "src").deleteDir() |
|
254 |
new File(corpusProject.getProjectDirectory(), "split").deleteDir() |
|
250 |
if (project.getCleanAfterBuild()) { |
|
251 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
252 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
253 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
255 | 254 |
} |
256 | 255 |
|
257 | 256 |
cpb.done() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 2998) | ||
---|---|---|
67 | 67 |
String userDir = System.getProperty("user.home"); |
68 | 68 |
|
69 | 69 |
def MONITOR; |
70 |
Project corpusProject;
|
|
70 |
Project project;
|
|
71 | 71 |
|
72 |
try {corpusProject=projectBinding;MONITOR=monitor} catch (Exception)
|
|
72 |
try {project=projectBinding;MONITOR=monitor} catch (Exception)
|
|
73 | 73 |
{ } |
74 |
if (corpusProject == null) { println "no project set. Aborting"; return; }
|
|
74 |
if (project == null) { println "no project set. Aborting"; return; }
|
|
75 | 75 |
|
76 |
String corpusname = corpusProject.getName();
|
|
76 |
String corpusname = project.getName();
|
|
77 | 77 |
String basename = corpusname |
78 |
String rootDir = corpusProject.getSrcdir();
|
|
79 |
String lang = corpusProject.getLang()
|
|
78 |
String rootDir = project.getSrcdir();
|
|
79 |
String lang = project.getLang()
|
|
80 | 80 |
String model = lang |
81 |
String encoding = corpusProject.getEncoding()
|
|
82 |
boolean annotate = corpusProject.getAnnotate()
|
|
83 |
String xsl = corpusProject.getFrontXSL();
|
|
84 |
def xslParams = corpusProject.getXsltParameters();
|
|
85 |
int wordsPerPage = corpusProject.getEditionDefinition("default").getWordsPerPage()
|
|
86 |
String page_element = corpusProject.getEditionDefinition("default").getPageElement()
|
|
87 |
boolean build_edition = corpusProject.getEditionDefinition("default").getBuildEdition()
|
|
88 |
boolean update = corpusProject.getDoUpdate()
|
|
81 |
String encoding = project.getEncoding()
|
|
82 |
boolean annotate = project.getAnnotate()
|
|
83 |
String xsl = project.getFrontXSL();
|
|
84 |
def xslParams = project.getXsltParameters();
|
|
85 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage()
|
|
86 |
String page_element = project.getEditionDefinition("default").getPageElement()
|
|
87 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition()
|
|
88 |
boolean update = project.getDoUpdate()
|
|
89 | 89 |
|
90 | 90 |
File srcDir = new File(rootDir); |
91 |
File binDir = corpusProject.getProjectDirectory();
|
|
91 |
File binDir = project.getProjectDirectory();
|
|
92 | 92 |
binDir.mkdirs(); |
93 | 93 |
if (!binDir.exists()) { |
94 | 94 |
println "Could not create binDir "+binDir |
... | ... | |
195 | 195 |
if (MONITOR != null) MONITOR.worked(1, "IMPORTER") |
196 | 196 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
197 | 197 |
println "-- IMPORTER" |
198 |
def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang, corpusProject) //put result in the txm folder of binDir
|
|
198 |
def imp = new importer(trsfiles, binDir, txmDir, metadatas, lang, project) //put result in the txm folder of binDir
|
|
199 | 199 |
if (!imp.run()) { |
200 | 200 |
println "Failed to prepare files - Aborting"; |
201 | 201 |
return; |
... | ... | |
243 | 243 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
244 | 244 |
annotationSuccess = true; |
245 | 245 |
|
246 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
247 |
if ("true".equals(cleanDirectories)) { |
|
248 |
new File(corpusProject.getProjectDirectory(), "ptreetagger").deleteDir() |
|
249 |
new File(corpusProject.getProjectDirectory(), "treetagger").deleteDir() |
|
250 |
new File(corpusProject.getProjectDirectory(), "annotations").deleteDir() |
|
246 |
if (project.getCleanAfterBuild()) { |
|
247 |
new File(binDir, "treetagger").deleteDir() |
|
248 |
new File(binDir, "ptreetagger").deleteDir() |
|
249 |
new File(binDir, "annotations").deleteDir() |
|
251 | 250 |
} |
252 | 251 |
} |
253 | 252 |
} |
... | ... | |
284 | 283 |
if(debug) comp.setDebug(); |
285 | 284 |
comp.removeInterviewers(removeInterviewer); |
286 | 285 |
comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata); |
287 |
if (!comp.run(corpusProject, xmltxmFiles, corpusname, "default", binDir)) {
|
|
286 |
if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
|
|
288 | 287 |
println "Failed to compile files"; |
289 | 288 |
return; |
290 | 289 |
} |
... | ... | |
312 | 311 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang); |
313 | 312 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang); |
314 | 313 |
|
315 |
Text t = corpusProject.getText(txtname)
|
|
314 |
Text t = project.getText(txtname)
|
|
316 | 315 |
if (t == null) { |
317 |
t = new Text(corpusProject);
|
|
316 |
t = new Text(project);
|
|
318 | 317 |
t.setName(txtname); |
319 | 318 |
} |
320 | 319 |
t.setSourceFile(txmFile) |
... | ... | |
356 | 355 |
String txtname = txmFile.getName(); |
357 | 356 |
int i = txtname.lastIndexOf("."); |
358 | 357 |
if(i > 0) txtname = txtname.substring(0, i); |
359 |
File mediaFile = new File(corpusProject.getSrcdir(), txtname + ".mp3")
|
|
360 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".wav")
|
|
361 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".mp4")
|
|
362 |
if (!mediaFile.exists()) mediaFile = new File(corpusProject.getSrcdir(), txtname + ".avi")
|
|
358 |
File mediaFile = new File(project.getSrcdir(), txtname + ".mp3")
|
|
359 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
|
|
360 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
|
|
361 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
|
|
363 | 362 |
|
364 | 363 |
if (mediaFile.exists()) { |
365 | 364 |
File copy = new File(binDir, "media/"+mediaFile.getName()) |
... | ... | |
374 | 373 |
|
375 | 374 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
376 | 375 |
if (MONITOR != null) MONITOR.worked(20, "FINALIZING") |
377 |
readyToLoad = corpusProject.save(); |
|
376 |
readyToLoad = project.save(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/compiler.groovy (revision 2998) | ||
---|---|---|
117 | 117 |
* @param outdir the outdir |
118 | 118 |
* @return true, if successful |
119 | 119 |
*/ |
120 |
public boolean run(Project corpusProject, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
|
|
120 |
public boolean run(Project project, List<File> xmlfiles, String corpusname, String projectname, File binDir) {
|
|
121 | 121 |
|
122 | 122 |
//println "run compiler with $xmlfiles, $basename and $outdir" |
123 | 123 |
this.outdir = binDir; |
... | ... | |
129 | 129 |
|
130 | 130 |
sectionAttrs = new HashSet<String>() // reset section attributs set |
131 | 131 |
|
132 |
CorpusBuild corpus = corpusProject.getCorpusBuild(corpusProject.getName(), MainCorpus.class);
|
|
132 |
CorpusBuild corpus = project.getCorpusBuild(project.getName(), MainCorpus.class);
|
|
133 | 133 |
if (corpus != null) { |
134 |
if (corpusProject.getDoUpdate()) {
|
|
134 |
if (project.getDoUpdate()) {
|
|
135 | 135 |
corpus.clean(); // remove old files |
136 | 136 |
} else { |
137 | 137 |
corpus.delete(); // remove old files and TXMResult children |
138 | 138 |
} |
139 | 139 |
} else { |
140 |
corpus = new MainCorpus(corpusProject);
|
|
141 |
corpus.setID(corpusProject.getName());
|
|
142 |
corpus.setName(corpusProject.getName());
|
|
140 |
corpus = new MainCorpus(project);
|
|
141 |
corpus.setID(project.getName());
|
|
142 |
corpus.setName(project.getName());
|
|
143 | 143 |
} |
144 | 144 |
corpus.setDescription("Built with the XML-TRS import module"); |
145 | 145 |
|
... | ... | |
236 | 236 |
|
237 | 237 |
} catch (Exception ex) {System.out.println(ex); return false;} |
238 | 238 |
|
239 |
String cleanDirectories = corpusProject.getCleanAfterBuild(); |
|
240 |
if ("true".equals(cleanDirectories)) { |
|
241 |
new File(corpusProject.getProjectDirectory(), "cqp").deleteDir() |
|
239 |
if (project.getCleanAfterBuild()) { |
|
240 |
new File(project.getProjectDirectory(), "cqp").deleteDir() |
|
242 | 241 |
} |
243 | 242 |
|
244 | 243 |
return true; |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/TRSToTEI.groovy (revision 2998) | ||
---|---|---|
206 | 206 |
writer.writeAttribute("time", formatedTime) |
207 | 207 |
|
208 | 208 |
writer.writeAttribute("start", time) |
209 |
writer.writeAttribute("end", parser.getAttributeValue(null, "endTime")) |
|
209 |
writer.writeAttribute("end", ""+parser.getAttributeValue(null, "endTime")) |
|
210 |
writer.writeAttribute("who", ""+parser.getAttributeValue(null, "speaker")) |
|
210 | 211 |
|
211 |
writer.writeAttribute("who", parser.getAttributeValue(null, "speaker")) |
|
212 |
|
|
213 | 212 |
for (int i = 0; i < parser.getAttributeCount(); i++) { // write other attributes if any |
214 | 213 |
String v = parser.getAttributeLocalName(i); |
215 | 214 |
if (!("who".equals(v)) && !("overlap".equals(v)) && !("time".equals(v)) && !("speaker".equals(v)) && !("endTime".equals(v)) && !("startTime".equals(v))) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 2998) | ||
---|---|---|
98 | 98 |
println "-- IMPORTER - Reading source files with extension "+suffixes |
99 | 99 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
100 | 100 |
if (MONITOR != null) MONITOR.worked(20, "IMPORTER - Reading source files with extension "+suffixes) |
101 |
if (!new importer().run(srcDir, binDir, txmDir,encoding, suffixes, basename, lang)) { |
|
101 |
if (!new importer().run(srcDir, binDir, txmDir,encoding, suffixes, basename, lang, project)) {
|
|
102 | 102 |
println "Import process stopped"; |
103 | 103 |
return; |
104 | 104 |
} |
... | ... | |
136 | 136 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
137 | 137 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
138 | 138 |
annotationSuccess = true; |
139 |
if (project.getCleanAfterBuild()) { |
|
140 |
new File(binDir, "treetagger").deleteDir() |
|
141 |
new File(binDir, "ptreetagger").deleteDir() |
|
142 |
new File(binDir, "annotations").deleteDir() |
|
143 |
} |
|
139 | 144 |
} |
140 | 145 |
} |
141 | 146 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/compiler.groovy (revision 2998) | ||
---|---|---|
267 | 267 |
cwbMa.run(corpusname, outDir + "/registry"); |
268 | 268 |
} catch (Exception ex) {System.out.println("CWB error: "+ex); return false;} |
269 | 269 |
|
270 |
if (project.getCleanAfterBuild()) { |
|
271 |
new File(binDir, "cqp").deleteDir() |
|
272 |
} |
|
273 |
|
|
270 | 274 |
return true; |
271 | 275 |
} |
272 | 276 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/importer.groovy (revision 2998) | ||
---|---|---|
56 | 56 |
* @param basename the basename |
57 | 57 |
* @return true, if successful |
58 | 58 |
*/ |
59 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang) { |
|
59 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, List<String> suffixes, String basename, String lang, def project) {
|
|
60 | 60 |
|
61 | 61 |
File stokenizedDir = new File(binDir,"stokenized"); |
62 | 62 |
stokenizedDir.deleteDir(); |
... | ... | |
297 | 297 |
files = txmDir.listFiles() |
298 | 298 |
if (files == null || files.size() == 0) return false |
299 | 299 |
|
300 |
if (project.getCleanAfterBuild()) { |
|
301 |
new File(project.getProjectDirectory(), "tokenized").deleteDir() |
|
302 |
new File(project.getProjectDirectory(), "ptokenized").deleteDir() |
|
303 |
new File(project.getProjectDirectory(), "stokenized").deleteDir() |
|
304 |
new File(project.getProjectDirectory(), "src").deleteDir() |
|
305 |
new File(project.getProjectDirectory(), "split").deleteDir() |
|
306 |
} |
|
307 |
|
|
300 | 308 |
return true; |
301 | 309 |
} |
302 | 310 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/importer.groovy (revision 2998) | ||
---|---|---|
68 | 68 |
* @param basename the basename |
69 | 69 |
* @return true, if successful |
70 | 70 |
*/ |
71 |
public static boolean run(File rootDirFile, File binDir, File txmDir, String encoding, String basename, String lang) |
|
71 |
public static boolean run(File rootDirFile, File binDir, File txmDir, String encoding, String basename, String lang, def project)
|
|
72 | 72 |
{ |
73 | 73 |
if (rootDirFile.listFiles() == null || rootDirFile.listFiles().size() == 0) { |
74 | 74 |
println "Error: no file to process in "+rootDirFile; |
... | ... | |
184 | 184 |
} |
185 | 185 |
} |
186 | 186 |
cpb.done() |
187 |
|
|
188 |
if (project.getCleanAfterBuild()) { |
|
189 |
new File(binDir, "tokenized").deleteDir() |
|
190 |
new File(binDir, "stokenized").deleteDir() |
|
191 |
new File(binDir, "src").deleteDir() |
|
192 |
new File(binDir, "split").deleteDir() |
|
193 |
} |
|
187 | 194 |
return true; |
188 | 195 |
} |
189 | 196 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/alcesteLoader.groovy (revision 2998) | ||
---|---|---|
78 | 78 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
79 | 79 |
println "-- IMPORTER - Reading source files" |
80 | 80 |
|
81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang))) { |
|
81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang, project))) {
|
|
82 | 82 |
println "import process stopped"; |
83 | 83 |
return; |
84 | 84 |
} |
... | ... | |
94 | 94 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
95 | 95 |
annotationSuccess = true; |
96 | 96 |
} |
97 |
|
|
98 |
if (project.getCleanAfterBuild()) { |
|
99 |
new File(binDir, "treetagger").deleteDir() |
|
100 |
new File(binDir, "ptreetagger").deleteDir() |
|
101 |
new File(binDir, "annotations").deleteDir() |
|
102 |
} |
|
97 | 103 |
} |
98 | 104 |
|
99 | 105 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/compiler.groovy (revision 2998) | ||
---|---|---|
246 | 246 |
|
247 | 247 |
} catch (Exception ex) {System.out.println(ex); return false;} |
248 | 248 |
|
249 |
if (project.getCleanAfterBuild()) { |
|
250 |
new File(binDir, "cqp").deleteDir() |
|
251 |
} |
|
252 |
|
|
249 | 253 |
return true; |
250 | 254 |
} |
251 | 255 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/hyperbaseLoader.groovy (revision 2998) | ||
---|---|---|
91 | 91 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
92 | 92 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
93 | 93 |
annotationSuccess = true; |
94 |
if (project.getCleanAfterBuild()) { |
|
95 |
new File(binDir, "treetagger").deleteDir() |
|
96 |
new File(binDir, "ptreetagger").deleteDir() |
|
97 |
new File(binDir, "annotations").deleteDir() |
|
98 |
} |
|
94 | 99 |
} |
95 | 100 |
} |
96 | 101 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/compiler.groovy (revision 2998) | ||
---|---|---|
248 | 248 |
|
249 | 249 |
} catch (Exception ex) {System.out.println(ex); return false;} |
250 | 250 |
|
251 |
if (project.getCleanAfterBuild()) { |
|
252 |
new File(binDir, "cqp").deleteDir() |
|
253 |
} |
|
254 |
|
|
251 | 255 |
return true; |
252 | 256 |
} |
253 | 257 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/cqp/compiler.groovy (revision 2998) | ||
---|---|---|
186 | 186 |
cwbMa.run(corpusname, outDir + "/registry"); |
187 | 187 |
return true; |
188 | 188 |
} catch (Exception ex) {System.out.println(ex); return false;} |
189 |
|
|
189 |
|
|
190 |
if (project.getCleanAfterBuild()) { |
|
191 |
new File(binDir, "cqp").deleteDir() |
|
192 |
} |
|
193 |
|
|
190 | 194 |
return true; |
191 | 195 |
} |
192 | 196 |
|
Formats disponibles : Unified diff