118 |
118 |
}
|
119 |
119 |
|
120 |
120 |
final HashMap<String, String> textordersInfo = new HashMap<String, String>();
|
121 |
|
for (String t : metadatas.keySet()) {
|
122 |
|
def ti = metadatas.get(t)
|
123 |
|
for (org.txm.metadatas.Entry e : ti) {
|
124 |
|
if ("textorder".equals(e.getId())) {
|
125 |
|
textordersInfo[t+".trs"] = ti.value()
|
|
121 |
if (metadatas != null) {
|
|
122 |
for (String t : metadatas.keySet()) {
|
|
123 |
def ti = metadatas.get(t)
|
|
124 |
for (org.txm.metadatas.Entry e : ti) {
|
|
125 |
if ("textorder".equals(e.getId())) {
|
|
126 |
String k = ""+t+".xml" // the sort test will use the xml-txm file names
|
|
127 |
textordersInfo[k] = e.value
|
|
128 |
}
|
126 |
129 |
}
|
127 |
130 |
}
|
128 |
|
}
|
129 |
|
|
|
131 |
}
|
130 |
132 |
File propertyFile = new File(srcDir, "import.properties")//default
|
131 |
133 |
Properties props = new Properties();
|
132 |
134 |
String[] metadatasToKeep;
|
... | ... | |
134 |
136 |
FileInputStream input = new FileInputStream(propertyFile);
|
135 |
137 |
props.load(input);
|
136 |
138 |
input.close();
|
137 |
|
|
|
139 |
|
138 |
140 |
if (props.getProperty("removeInterviewer") != null)
|
139 |
141 |
removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
|
140 |
142 |
if (props.getProperty("ignoreTranscriberMetadata") != null)
|
... | ... | |
145 |
147 |
csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
|
146 |
148 |
//if (props.getProperty("includeComments") != null)
|
147 |
149 |
// includeComments = props.get("includeComments").toString();
|
148 |
|
|
|
150 |
|
149 |
151 |
println "import properties: "
|
150 |
152 |
println " removeInterviewer: "+removeInterviewer
|
151 |
153 |
println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
|
... | ... | |
168 |
170 |
srcDir = new File(binDir, "src");
|
169 |
171 |
println ""
|
170 |
172 |
}
|
171 |
|
|
|
173 |
|
172 |
174 |
// select only trs files
|
173 |
175 |
String ext = "trs";
|
174 |
176 |
ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
|
... | ... | |
183 |
185 |
i--;
|
184 |
186 |
}
|
185 |
187 |
}
|
186 |
|
|
|
188 |
|
187 |
189 |
if (trsfiles.size() == 0) {
|
188 |
190 |
println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
|
189 |
191 |
return false;
|
190 |
192 |
}
|
191 |
|
|
|
193 |
|
192 |
194 |
if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
|
193 |
195 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
194 |
196 |
println "-- IMPORTER"
|
... | ... | |
198 |
200 |
return;
|
199 |
201 |
}
|
200 |
202 |
if (MONITOR != null) MONITOR.worked(20)
|
201 |
|
|
|
203 |
|
202 |
204 |
println "-- Xml Validation"
|
203 |
205 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
204 |
206 |
for (File infile : txmDir.listFiles()) {
|
... | ... | |
207 |
209 |
infile.delete();
|
208 |
210 |
}
|
209 |
211 |
}
|
210 |
|
|
|
212 |
|
211 |
213 |
if (MONITOR != null) MONITOR.worked(5)
|
212 |
214 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
213 |
215 |
println "-- Remove interviewer: "+removeInterviewer
|
... | ... | |
221 |
223 |
int idx = filename.indexOf(".xml");
|
222 |
224 |
if (idx > 0)
|
223 |
225 |
filename = filename.substring(0, idx);
|
224 |
|
|
|
226 |
|
225 |
227 |
ArrayList<Pair<String, String>> metas = metadatas.get(filename)
|
226 |
228 |
//println "filename=$filename metas= $metas"
|
227 |
229 |
for (Pair p : metas) {
|
... | ... | |
232 |
234 |
}
|
233 |
235 |
}
|
234 |
236 |
}
|
235 |
|
|
|
237 |
|
236 |
238 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
237 |
239 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
|
238 |
|
|
|
240 |
|
239 |
241 |
boolean annotationSuccess = false;
|
240 |
242 |
if (annotate) {
|
241 |
243 |
println "-- ANNOTATE - Running NLP tools"
|
... | ... | |
244 |
246 |
annotationSuccess = true;
|
245 |
247 |
}
|
246 |
248 |
}
|
247 |
|
}
|
248 |
|
trsfiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
|
|
249 |
} // end of importer and annotate steps
|
249 |
250 |
|
|
251 |
xmltxmFiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
|
250 |
252 |
if (metadatas != null && metadatas.getPropertyNames().contains("textorder")) {
|
251 |
|
Collections.sort(trsfiles, new Comparator<File>() {
|
|
253 |
Collections.sort(xmltxmFiles, new Comparator<File>() {
|
252 |
254 |
public int compare(File f1, File f2) {
|
253 |
|
String o1 = textorder[f1.getName()];
|
254 |
|
String o2 = textorder[f2.getName()];
|
|
255 |
String o1 = textordersInfo[f1.getName()];
|
|
256 |
String o2 = textordersInfo[f2.getName()];
|
255 |
257 |
if (o1 == null && o2 == null) {
|
256 |
258 |
return f1.compareTo(f2);
|
257 |
259 |
} else if (o1 == null) {
|
... | ... | |
266 |
268 |
}
|
267 |
269 |
});
|
268 |
270 |
} else {
|
269 |
|
Collections.sort(trsfiles);
|
|
271 |
Collections.sort(xmltxmFiles);
|
270 |
272 |
}
|
271 |
|
|
|
273 |
|
272 |
274 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
273 |
275 |
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
|
274 |
276 |
println "--COMPILING - Building Search Engine indexes"
|
275 |
|
|
|
277 |
|
276 |
278 |
def comp = new compiler()
|
277 |
279 |
if(debug) comp.setDebug();
|
278 |
280 |
comp.removeInterviewers(removeInterviewer);
|
279 |
281 |
comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
|
280 |
|
if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
|
|
282 |
if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
|
281 |
283 |
println "Failed to compile files";
|
282 |
284 |
return;
|
283 |
285 |
}
|
284 |
|
|
|
286 |
|
285 |
287 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
286 |
|
|
|
288 |
|
287 |
289 |
File htmlDir = new File(binDir,"HTML/$corpusname");
|
288 |
290 |
htmlDir.deleteDir()
|
289 |
291 |
htmlDir.mkdirs();
|
290 |
292 |
if (build_edition) {
|
291 |
|
|
|
293 |
|
292 |
294 |
if (MONITOR != null) MONITOR.worked(20, "EDITION")
|
293 |
295 |
println "-- EDITION - Building editions"
|
294 |
|
|
|
296 |
|
295 |
297 |
def second = 0
|
296 |
|
|
297 |
|
println "Paginating "+trsfiles.size()+" texts"
|
298 |
|
ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
|
299 |
|
for (File txmFile : trsfiles) {
|
|
298 |
|
|
299 |
println "Paginating "+xmltxmFiles.size()+" texts"
|
|
300 |
ConsoleProgressBar cpb = new ConsoleProgressBar(xmltxmFiles.size());
|
|
301 |
for (File txmFile : xmltxmFiles) {
|
300 |
302 |
cpb.tick()
|
301 |
303 |
String txtname = txmFile.getName();
|
302 |
304 |
int i = txtname.lastIndexOf(".");
|
303 |
305 |
if(i > 0) txtname = txtname.substring(0, i);
|
304 |
|
|
|
306 |
|
305 |
307 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
|
306 |
308 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
|
307 |
|
|
|
309 |
|
308 |
310 |
Text t = project.getText(txtname)
|
309 |
311 |
if (t == null) {
|
310 |
|
new Text(project);
|
|
312 |
t = new Text(project);
|
311 |
313 |
t.setName(txtname);
|
312 |
|
t.setSourceFile(txmFile)
|
313 |
|
t.setTXMFile(txmFile)
|
|
314 |
}
|
|
315 |
t.setSourceFile(txmFile)
|
|
316 |
t.setTXMFile(txmFile)
|
|
317 |
|
|
318 |
Edition edition = t.getEdition("default")
|
|
319 |
if (edition != null) {
|
|
320 |
edition.delete();
|
|
321 |
edition = null;
|
314 |
322 |
}
|
315 |
|
def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
|
316 |
|
Edition edition = new Edition(t);
|
|
323 |
def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
|
|
324 |
edition = t.getEdition("default")
|
|
325 |
edition = new Edition(t);
|
317 |
326 |
edition.setName("default");
|
|
327 |
|
318 |
328 |
edition.setIndex(htmlDir.getAbsolutePath());
|
319 |
329 |
for (i = 0 ; i < ed.getPageFiles().size();) {
|
320 |
330 |
File f = ed.getPageFiles().get(i);
|
... | ... | |
324 |
334 |
}
|
325 |
335 |
}
|
326 |
336 |
cpb.done()
|
327 |
|
|
|
337 |
|
328 |
338 |
//copy transcriber.css
|
329 |
339 |
File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css")
|
330 |
340 |
File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css")
|
... | ... | |
333 |
343 |
FileCopy.copy(cssfile, new File(htmlDir, "default/txm.css"));
|
334 |
344 |
FileCopy.copy(cssfile, new File(htmlDir, "default/transcriber.css"));
|
335 |
345 |
}
|
336 |
|
|
|
346 |
|
337 |
347 |
//copy media files
|
338 |
|
println "Copying media files if any (mp3, wav, mp4 or avi) "+trsfiles.size()+" texts"
|
339 |
|
cpb = new ConsoleProgressBar(trsfiles.size());
|
340 |
|
for (File txmFile : trsfiles) {
|
|
348 |
println "Copying media files if any (mp3, wav, mp4 or avi) "+xmltxmFiles.size()+" texts"
|
|
349 |
cpb = new ConsoleProgressBar(xmltxmFiles.size());
|
|
350 |
for (File txmFile : xmltxmFiles) {
|
341 |
351 |
cpb.tick()
|
342 |
352 |
String txtname = txmFile.getName();
|
343 |
353 |
int i = txtname.lastIndexOf(".");
|
... | ... | |
346 |
356 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
|
347 |
357 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
|
348 |
358 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
|
349 |
|
|
|
359 |
|
350 |
360 |
if (mediaFile.exists()) {
|
351 |
361 |
File copy = new File(binDir, "media/"+mediaFile.getName())
|
352 |
362 |
copy.getParentFile().mkdirs()
|