Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / xtz / XTZImport.groovy @ 479

History | View | Annotate | Download (5.3 kB)

1
package org.txm.importer.xtz;
2

    
3
import java.io.File;
4
import java.io.FileFilter;
5
import java.util.ArrayList;
6
import java.util.Arrays;
7
import java.util.Collections;
8

    
9
import org.txm.functions.ProgressWatcher;
10
import org.txm.utils.xml.DomUtils;
11
import org.txm.metadatas.Metadatas
12
import org.txm.objects.BaseParameters
13
import org.txm.utils.io.FileCopy;
14
import org.txm.*
15
import org.w3c.dom.Element
16

    
17
public class XTZImport extends ImportModule {
18

    
19
        public XTZImport(File importParametersFile) {
20
                super(importParametersFile);
21
        }
22

    
23
        public XTZImport(BaseParameters importParameters) {
24
                super(importParameters);
25
        }
26

    
27
        @Override
28
        public void init(BaseParameters p) {
29
                super.init(p);
30
                
31
                importer = new XTZImporter(this)
32
                compiler = new XTZCompiler(this)
33
                annotater = new TTAnnotater(this);
34
                pager = new XTZPager(this)
35
        }
36
        
37
        @Override
38
        protected ArrayList<File> getTXMFilesOrder() {
39
                //System.out.println("XTZ FILES ORDER");
40
                if (importer == null) {
41
                        println "no importer step, using default text order"
42
                        return super.getTXMFilesOrder();
43
                }
44
                Metadatas metadata = importer.getMetadata();
45
                if (metadata == null) {
46
                        println "no metadata, using default text order"
47
                        return super.getTXMFilesOrder();
48
                }
49
                File txmDirectory = new File(binaryDirectory, "txm/"+corpusName);
50
                ArrayList<File> files = new ArrayList<File>(Arrays.asList(txmDirectory.listFiles(new FileFilter() {
51
                        @Override
52
                        public boolean accept(File file) {
53
                                return file.isFile() && file.getName().endsWith(".xml");
54
                        }
55
                })));
56
        
57
                final HashMap<File, String> textorder = new HashMap<File, String>();
58
                for (File f : files) {
59
                        HashMap<String, String> m = metadata.getTextMetadata(f);
60
                        if (m != null && m.containsKey("textorder")) textorder[f] = m["textorder"];
61
                }
62
                println "Sorting texts using 'textorder' metadata values: "+textorder
63
                Collections.sort(files, new Comparator<File>() {
64
                        public int compare(File f1, File f2) {
65
                                String o1 = textorder[f1];
66
                                String o2 = textorder[f2];
67
                                if (o1 == null && o2 == null) {
68
                                        return f1.getName().compareTo(f2.getName());
69
                                } else if (o1 == null) {
70
                                        return 1
71
                                } else if (o2 == null) {
72
                                        return -1
73
                                } else {
74
                                        int c = o1.compareTo(o2);
75
                                        if (c == 0) return f1.getName().compareTo(f2.getName());
76
                                        else return c;
77
                                }
78
                        }
79
                });
80
                //println files
81
                return files;
82
        }
83
        
84
        public void start() throws InterruptedException {
85
                super.start();
86

    
87
                if (isSuccessful) {
88
                        
89
                        //declare a local KR
90
                        List<String> krnames = importParameters.getKnowledgeRepositoryNames();
91
                        if (krnames.size() == 0) {
92
                                importParameters.createKnowledgeRepositoryElement("DEFAULT"); // set a default KR shared by all XTZ corpus
93
                        } else if (krnames.size() == 1 && krnames.get(0).equals("DEFAULT")) {
94
                                // nothing to do
95
                        } else {
96
                                println("Corpus is using custom Knowledge repositories: "+importParameters.getKnowledgeRepositoryNames());
97
                        }
98
                        
99
                        //copy sub directories
100
                        if (isUpdatingCorpus()) {
101

    
102
                        } else {
103
                                def dirToCopy = ["xsl", "css", "dtd"]
104
                                println "--- Copying subdirectories $dirToCopy"
105
                                for (String dir : dirToCopy) {
106
                                        File origDirectory = new File(this.sourceDirectory, dir)
107
                                        if (origDirectory.exists()) {
108
                                                print "."
109
                                                File copyDirectory = new File(this.binaryDirectory, dir)
110
                                                FileCopy.copyFiles(origDirectory, copyDirectory)
111
                                        }
112
                                }
113
                                println ""
114
                        }
115
                }
116
        }
117

    
118
        public static void main(String[] args) {
119

    
120
                long start = System.currentTimeMillis()
121

    
122
                String userDir = System.getProperty("user.home")
123
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM"));
124
                //Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows
125
                Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger"));
126
                //Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File("C:\\Program Files\\treetagger"));//for Windows
127
                Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models"));
128
                Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
129
                Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
130
                Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
131
                //Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows
132
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(userDir, "TXM"));
133

    
134
                //File importParametersFile = new File("/home/mdecorde/xml/brown-for-xtz/import.xml");
135
                //                File importParametersFile = new File("/home/mdecorde/xml/qgraalc/qgraal_cw/import.xml"); // new import
136
                //File importParametersFile = new File("/home/mdecorde/TXM/corpora/QGRAALXTZ/import.xml"); // corpus update
137
                //                File importParametersFile = new File("/home/mdecorde/xml/annotation/import.xml"); // new import
138
                //File importParametersFile = new File("/home/mdecorde/xml/baiptest/import.xml"); // new import
139
                //File importParametersFile = new File("/home/mdecorde/TXM/corpora/BAIP/import.xml"); // corpus update
140
                File importParametersFile = new File("/home/mdecorde/xml/xtzsmall/import.xml");
141
                BaseParameters b = new BaseParameters(importParametersFile);
142
                b.getKeyValueParameters().put(ImportKeys.CLEAN, "false")
143
                b.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false")
144
                b.getKeyValueParameters().put(ImportKeys.DEBUG, "true")
145
                b.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "false")
146

    
147
                XTZImport i = new XTZImport(b);
148
                i.start();
149
                
150
                println "Done: "+i.isSuccessful
151

    
152
                long end = System.currentTimeMillis()
153
                println "TIME: "+(end-start)
154
        }
155
}