root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / xtz / XTZImport.groovy @ 2126
History | View | Annotate | Download (3.4 kB)
1 |
package org.txm.scripts.importer.xtz;
|
---|---|
2 |
|
3 |
import java.io.File; |
4 |
import java.io.FileFilter; |
5 |
import java.util.ArrayList; |
6 |
import java.util.Arrays; |
7 |
import java.util.Collections; |
8 |
|
9 |
import org.eclipse.core.runtime.IProgressMonitor; |
10 |
import org.txm.utils.xml.DomUtils; |
11 |
import org.txm.metadatas.Metadatas |
12 |
import org.txm.utils.io.FileCopy; |
13 |
import org.txm.* |
14 |
import org.txm.objects.* |
15 |
import org.w3c.dom.Element |
16 |
import org.txm.importer.xtz.* |
17 |
|
18 |
public class XTZImport extends ImportModule { |
19 |
|
20 |
public XTZImport(File importParametersFile) { |
21 |
super(importParametersFile);
|
22 |
} |
23 |
|
24 |
public XTZImport(Project p) {
|
25 |
super(p);
|
26 |
} |
27 |
|
28 |
@Override
|
29 |
public void init(Project p) { |
30 |
super.init(p);
|
31 |
|
32 |
importer = new XTZImporter(this) |
33 |
compiler = new XTZCompiler(this) |
34 |
annotater = new TTAnnotater(this); |
35 |
pager = new XTZPager(this) |
36 |
} |
37 |
|
38 |
@Override
|
39 |
protected ArrayList<File> getTXMFilesOrder() { |
40 |
//System.out.println("XTZ FILES ORDER");
|
41 |
if (importer == null) { |
42 |
println "no importer step, using default text order"
|
43 |
return super.getTXMFilesOrder(); |
44 |
} |
45 |
Metadatas metadata = importer.getMetadata(); |
46 |
if (metadata == null) { |
47 |
println "no metadata, using default text order"
|
48 |
return super.getTXMFilesOrder(); |
49 |
} |
50 |
File txmDirectory = new File(binaryDirectory, "txm/"+corpusName); |
51 |
ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() { |
52 |
@Override
|
53 |
public boolean accept(File file) { |
54 |
return file.isFile() && file.getName().endsWith(".xml"); |
55 |
} |
56 |
}))); |
57 |
|
58 |
final HashMap<File, String> textorder = new HashMap<File, String>(); |
59 |
for (File f : files) { |
60 |
HashMap<String, String> m = metadata.getTextMetadata(f); |
61 |
if (m != null && m.containsKey("textorder")) textorder[f] = m["textorder"]; |
62 |
} |
63 |
println "Sorting texts using 'textorder' metadata values: "+textorder
|
64 |
Collections.sort(files, new Comparator<File>() { |
65 |
public int compare(File f1, File f2) { |
66 |
String o1 = textorder[f1];
|
67 |
String o2 = textorder[f2];
|
68 |
if (o1 == null && o2 == null) { |
69 |
return f1.getName().compareTo(f2.getName());
|
70 |
} else if (o1 == null) { |
71 |
return 1 |
72 |
} else if (o2 == null) { |
73 |
return -1 |
74 |
} else {
|
75 |
int c = o1.compareTo(o2);
|
76 |
if (c == 0) return f1.getName().compareTo(f2.getName()); |
77 |
else return c; |
78 |
} |
79 |
} |
80 |
}); |
81 |
//println files
|
82 |
return files;
|
83 |
} |
84 |
|
85 |
public void start() throws InterruptedException { |
86 |
super.start();
|
87 |
|
88 |
if (isSuccessful) {
|
89 |
|
90 |
//declare a local KR
|
91 |
//TODO find out how the annotation plugin may hook the import steps
|
92 |
// List<String> krnames = importParameters.getKnowledgeRepositoryNames();
|
93 |
// if (krnames.size() == 0) {
|
94 |
// importParameters.createKnowledgeRepositoryElement("DEFAULT"); // set a default KR shared by all XTZ corpus
|
95 |
// } else if (krnames.size() == 1 && krnames.get(0).equals("DEFAULT")) {
|
96 |
// // nothing to do
|
97 |
// } else {
|
98 |
// println("Corpus is using custom Knowledge repositories: "+importParameters.getKnowledgeRepositoryNames());
|
99 |
// }
|
100 |
|
101 |
//copy sub directories
|
102 |
if (isUpdatingCorpus()) {
|
103 |
|
104 |
} else {
|
105 |
def dirToCopy = ["xsl", "css", "dtd"] |
106 |
println "--- Copying subdirectories $dirToCopy"
|
107 |
for (String dir : dirToCopy) { |
108 |
File origDirectory = new File(this.sourceDirectory, dir) |
109 |
if (origDirectory.exists()) {
|
110 |
print "."
|
111 |
File copyDirectory = new File(this.binaryDirectory, dir) |
112 |
copyDirectory.deleteDir(); // clean before copying
|
113 |
FileCopy.copyFiles(origDirectory, copyDirectory) |
114 |
} |
115 |
} |
116 |
println ""
|
117 |
} |
118 |
} |
119 |
} |
120 |
} |