| 118 |
118 |
}
|
| 119 |
119 |
|
| 120 |
120 |
final HashMap<String, String> textordersInfo = new HashMap<String, String>();
|
| 121 |
|
for (String t : metadatas.keySet()) {
|
| 122 |
|
def ti = metadatas.get(t)
|
| 123 |
|
for (org.txm.metadatas.Entry e : ti) {
|
| 124 |
|
if ("textorder".equals(e.getId())) {
|
| 125 |
|
textordersInfo[t+".trs"] = ti.value()
|
|
121 |
if (metadatas != null) {
|
|
122 |
for (String t : metadatas.keySet()) {
|
|
123 |
def ti = metadatas.get(t)
|
|
124 |
for (org.txm.metadatas.Entry e : ti) {
|
|
125 |
if ("textorder".equals(e.getId())) {
|
|
126 |
String k = ""+t+".xml" // the sort test will use the xml-txm file names
|
|
127 |
textordersInfo[k] = e.value
|
|
128 |
}
|
| 126 |
129 |
}
|
| 127 |
130 |
}
|
| 128 |
|
}
|
| 129 |
|
|
|
131 |
}
|
| 130 |
132 |
File propertyFile = new File(srcDir, "import.properties")//default
|
| 131 |
133 |
Properties props = new Properties();
|
| 132 |
134 |
String[] metadatasToKeep;
|
| ... | ... | |
| 134 |
136 |
FileInputStream input = new FileInputStream(propertyFile);
|
| 135 |
137 |
props.load(input);
|
| 136 |
138 |
input.close();
|
| 137 |
|
|
|
139 |
|
| 138 |
140 |
if (props.getProperty("removeInterviewer") != null)
|
| 139 |
141 |
removeInterviewer = Boolean.parseBoolean(props.get("removeInterviewer").toString());
|
| 140 |
142 |
if (props.getProperty("ignoreTranscriberMetadata") != null)
|
| ... | ... | |
| 145 |
147 |
csvHeaderNumber = props.get("csvHeaderNumber").toString().split("|");
|
| 146 |
148 |
//if (props.getProperty("includeComments") != null)
|
| 147 |
149 |
// includeComments = props.get("includeComments").toString();
|
| 148 |
|
|
|
150 |
|
| 149 |
151 |
println "import properties: "
|
| 150 |
152 |
println " removeInterviewer: "+removeInterviewer
|
| 151 |
153 |
println " ignoreTranscriberMetadata: "+ignoreTranscriberMetadata
|
| ... | ... | |
| 168 |
170 |
srcDir = new File(binDir, "src");
|
| 169 |
171 |
println ""
|
| 170 |
172 |
}
|
| 171 |
|
|
|
173 |
|
| 172 |
174 |
// select only trs files
|
| 173 |
175 |
String ext = "trs";
|
| 174 |
176 |
ArrayList<File> trsfiles = srcDir.listFiles(); //find all trs files
|
| ... | ... | |
| 183 |
185 |
i--;
|
| 184 |
186 |
}
|
| 185 |
187 |
}
|
| 186 |
|
|
|
188 |
|
| 187 |
189 |
if (trsfiles.size() == 0) {
|
| 188 |
190 |
println ("No transcription file (*.trs) found in "+srcDir.getAbsolutePath()+". Aborting.")
|
| 189 |
191 |
return false;
|
| 190 |
192 |
}
|
| 191 |
|
|
|
193 |
|
| 192 |
194 |
if (MONITOR != null) MONITOR.worked(1, "IMPORTER")
|
| 193 |
195 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 194 |
196 |
println "-- IMPORTER"
|
| ... | ... | |
| 198 |
200 |
return;
|
| 199 |
201 |
}
|
| 200 |
202 |
if (MONITOR != null) MONITOR.worked(20)
|
| 201 |
|
|
|
203 |
|
| 202 |
204 |
println "-- Xml Validation"
|
| 203 |
205 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 204 |
206 |
for (File infile : txmDir.listFiles()) {
|
| ... | ... | |
| 207 |
209 |
infile.delete();
|
| 208 |
210 |
}
|
| 209 |
211 |
}
|
| 210 |
|
|
|
212 |
|
| 211 |
213 |
if (MONITOR != null) MONITOR.worked(5)
|
| 212 |
214 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 213 |
215 |
println "-- Remove interviewer: "+removeInterviewer
|
| ... | ... | |
| 221 |
223 |
int idx = filename.indexOf(".xml");
|
| 222 |
224 |
if (idx > 0)
|
| 223 |
225 |
filename = filename.substring(0, idx);
|
| 224 |
|
|
|
226 |
|
| 225 |
227 |
ArrayList<Pair<String, String>> metas = metadatas.get(filename)
|
| 226 |
228 |
//println "filename=$filename metas= $metas"
|
| 227 |
229 |
for (Pair p : metas) {
|
| ... | ... | |
| 232 |
234 |
}
|
| 233 |
235 |
}
|
| 234 |
236 |
}
|
| 235 |
|
|
|
237 |
|
| 236 |
238 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 237 |
239 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE")
|
| 238 |
|
|
|
240 |
|
| 239 |
241 |
boolean annotationSuccess = false;
|
| 240 |
242 |
if (annotate) {
|
| 241 |
243 |
println "-- ANNOTATE - Running NLP tools"
|
| ... | ... | |
| 244 |
246 |
annotationSuccess = true;
|
| 245 |
247 |
}
|
| 246 |
248 |
}
|
| 247 |
|
}
|
| 248 |
|
trsfiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
|
|
249 |
} // end of importer and annotate steps
|
| 249 |
250 |
|
|
251 |
xmltxmFiles = new ArrayList<File>(Arrays.asList(txmDir.listFiles()));
|
| 250 |
252 |
if (metadatas != null && metadatas.getPropertyNames().contains("textorder")) {
|
| 251 |
|
Collections.sort(trsfiles, new Comparator<File>() {
|
|
253 |
Collections.sort(xmltxmFiles, new Comparator<File>() {
|
| 252 |
254 |
public int compare(File f1, File f2) {
|
| 253 |
|
String o1 = textorder[f1.getName()];
|
| 254 |
|
String o2 = textorder[f2.getName()];
|
|
255 |
String o1 = textordersInfo[f1.getName()];
|
|
256 |
String o2 = textordersInfo[f2.getName()];
|
| 255 |
257 |
if (o1 == null && o2 == null) {
|
| 256 |
258 |
return f1.compareTo(f2);
|
| 257 |
259 |
} else if (o1 == null) {
|
| ... | ... | |
| 266 |
268 |
}
|
| 267 |
269 |
});
|
| 268 |
270 |
} else {
|
| 269 |
|
Collections.sort(trsfiles);
|
|
271 |
Collections.sort(xmltxmFiles);
|
| 270 |
272 |
}
|
| 271 |
|
|
|
273 |
|
| 272 |
274 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 273 |
275 |
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
|
| 274 |
276 |
println "--COMPILING - Building Search Engine indexes"
|
| 275 |
|
|
|
277 |
|
| 276 |
278 |
def comp = new compiler()
|
| 277 |
279 |
if(debug) comp.setDebug();
|
| 278 |
280 |
comp.removeInterviewers(removeInterviewer);
|
| 279 |
281 |
comp.setIgnoreTranscriberMetadata(ignoreTranscriberMetadata);
|
| 280 |
|
if (!comp.run(project, trsfiles, corpusname, "default", binDir)) {
|
|
282 |
if (!comp.run(project, xmltxmFiles, corpusname, "default", binDir)) {
|
| 281 |
283 |
println "Failed to compile files";
|
| 282 |
284 |
return;
|
| 283 |
285 |
}
|
| 284 |
|
|
|
286 |
|
| 285 |
287 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
|
| 286 |
|
|
|
288 |
|
| 287 |
289 |
File htmlDir = new File(binDir,"HTML/$corpusname");
|
| 288 |
290 |
htmlDir.deleteDir()
|
| 289 |
291 |
htmlDir.mkdirs();
|
| 290 |
292 |
if (build_edition) {
|
| 291 |
|
|
|
293 |
|
| 292 |
294 |
if (MONITOR != null) MONITOR.worked(20, "EDITION")
|
| 293 |
295 |
println "-- EDITION - Building editions"
|
| 294 |
|
|
|
296 |
|
| 295 |
297 |
def second = 0
|
| 296 |
|
|
| 297 |
|
println "Paginating "+trsfiles.size()+" texts"
|
| 298 |
|
ConsoleProgressBar cpb = new ConsoleProgressBar(trsfiles.size());
|
| 299 |
|
for (File txmFile : trsfiles) {
|
|
298 |
|
|
299 |
println "Paginating "+xmltxmFiles.size()+" texts"
|
|
300 |
ConsoleProgressBar cpb = new ConsoleProgressBar(xmltxmFiles.size());
|
|
301 |
for (File txmFile : xmltxmFiles) {
|
| 300 |
302 |
cpb.tick()
|
| 301 |
303 |
String txtname = txmFile.getName();
|
| 302 |
304 |
int i = txtname.lastIndexOf(".");
|
| 303 |
305 |
if(i > 0) txtname = txtname.substring(0, i);
|
| 304 |
|
|
|
306 |
|
| 305 |
307 |
List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
|
| 306 |
308 |
List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
|
| 307 |
|
|
|
309 |
|
| 308 |
310 |
Text t = project.getText(txtname)
|
| 309 |
311 |
if (t == null) {
|
| 310 |
|
new Text(project);
|
|
312 |
t = new Text(project);
|
| 311 |
313 |
t.setName(txtname);
|
| 312 |
|
t.setSourceFile(txmFile)
|
| 313 |
|
t.setTXMFile(txmFile)
|
|
314 |
}
|
|
315 |
t.setSourceFile(txmFile)
|
|
316 |
t.setTXMFile(txmFile)
|
|
317 |
|
|
318 |
Edition edition = t.getEdition("default")
|
|
319 |
if (edition != null) {
|
|
320 |
edition.delete();
|
|
321 |
edition = null;
|
| 314 |
322 |
}
|
| 315 |
|
def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
|
| 316 |
|
Edition edition = new Edition(t);
|
|
323 |
def ed = new pager(txmFile, htmlDir, txtname, NoSpaceBefore, NoSpaceAfter, wordsPerPage, basename, page_element, metadatas);
|
|
324 |
edition = t.getEdition("default")
|
|
325 |
edition = new Edition(t);
|
| 317 |
326 |
edition.setName("default");
|
|
327 |
|
| 318 |
328 |
edition.setIndex(htmlDir.getAbsolutePath());
|
| 319 |
329 |
for (i = 0 ; i < ed.getPageFiles().size();) {
|
| 320 |
330 |
File f = ed.getPageFiles().get(i);
|
| ... | ... | |
| 324 |
334 |
}
|
| 325 |
335 |
}
|
| 326 |
336 |
cpb.done()
|
| 327 |
|
|
|
337 |
|
| 328 |
338 |
//copy transcriber.css
|
| 329 |
339 |
File cssfile = new File(Toolbox.getTxmHomePath(), "css/transcriber.css")
|
| 330 |
340 |
File cssTXMFile = new File(Toolbox.getTxmHomePath(), "css/txm.css")
|
| ... | ... | |
| 333 |
343 |
FileCopy.copy(cssfile, new File(htmlDir, "default/txm.css"));
|
| 334 |
344 |
FileCopy.copy(cssfile, new File(htmlDir, "default/transcriber.css"));
|
| 335 |
345 |
}
|
| 336 |
|
|
|
346 |
|
| 337 |
347 |
//copy media files
|
| 338 |
|
println "Copying media files if any (mp3, wav, mp4 or avi) "+trsfiles.size()+" texts"
|
| 339 |
|
cpb = new ConsoleProgressBar(trsfiles.size());
|
| 340 |
|
for (File txmFile : trsfiles) {
|
|
348 |
println "Copying media files if any (mp3, wav, mp4 or avi) "+xmltxmFiles.size()+" texts"
|
|
349 |
cpb = new ConsoleProgressBar(xmltxmFiles.size());
|
|
350 |
for (File txmFile : xmltxmFiles) {
|
| 341 |
351 |
cpb.tick()
|
| 342 |
352 |
String txtname = txmFile.getName();
|
| 343 |
353 |
int i = txtname.lastIndexOf(".");
|
| ... | ... | |
| 346 |
356 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".wav")
|
| 347 |
357 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".mp4")
|
| 348 |
358 |
if (!mediaFile.exists()) mediaFile = new File(project.getSrcdir(), txtname + ".avi")
|
| 349 |
|
|
|
359 |
|
| 350 |
360 |
if (mediaFile.exists()) {
|
| 351 |
361 |
File copy = new File(binDir, "media/"+mediaFile.getName())
|
| 352 |
362 |
copy.getParentFile().mkdirs()
|