Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / bin / org / txm / importer / srcmf2 / tigersearchLoader_old.groovy @ 458

History | View | Annotate | Download (7.1 kB)

1
package org.txm.importer.srcmf2
2

    
3
// srcdir
4
// txml/master.xml
5
// txml/*.xml
6
// txm/*.xml
7

    
8
// 1 import xml-txm
9
import ims.tiger.gui.tigerregistry.TIGERRegistry;
10
import ims.tiger.index.writer.IndexBuilderErrorHandler;
11
import ims.tiger.index.writer.SimpleErrorHandler;
12
import ims.tiger.index.writer.XMLIndexing;
13
import ims.tiger.system.*;
14

    
15
import javax.xml.stream.*;
16

    
17
import java.io.BufferedWriter;
18
import java.io.File;
19
import java.io.FileOutputStream;
20
import java.io.IOException;
21
import java.io.OutputStreamWriter;
22
import java.io.PrintStream;
23
import java.net.URL;
24
import java.util.HashMap;
25
import java.util.logging.FileHandler
26
import java.io.File;
27

    
28
import org.apache.log4j.BasicConfigurator;
29
import org.txm.export.ts.*;
30
import org.txm.importer.ApplyXsl2;
31
import org.txm.utils.xml.DomUtils;
32
import org.txm.importer.ValidateXml;
33
import org.txm.importer.XPathResult;
34
import org.txm.importer.xmltxm.compiler;
35
import org.txm.importer.xml.pager;
36
import org.txm.objects.*;
37
import org.txm.utils.*;
38
import org.txm.*;
39
import org.txm.scripts.teitxm.*;
40
import org.txm.metadatas.*;
41
import org.txm.utils.i18n.*;
42
import org.w3c.dom.Element
43
import org.xml.sax.SAXException;
44

    
45
import javax.xml.parsers.DocumentBuilderFactory
46

    
47
String userDir = System.getProperty("user.home");
48
boolean debug = false
49
def MONITOR;
50
BaseParameters params;
51
try {params = paramsBinding;MONITOR=monitor} catch (Exception)
52
{        println "DEV MODE";//exception means we debug
53
        debug = true
54
        params = new BaseParameters(new File(userDir, "xml/srcmfmadrid/import.xml"))
55
        params.load()
56
        if (!org.txm.Toolbox.isInitialized()) {
57
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM"));
58
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM"));
59
                Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger"));
60
                Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models"));
61
                Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
62
                Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
63
                Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
64
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM"));
65
        }
66
}
67
if (params == null) { println "no parameters. Aborting"; return; }
68

    
69
String corpusname = params.getCorpusName();
70
Element corpusElem = params.corpora.get(corpusname);
71
String basename = params.name;
72
String rootDir = params.rootDir;
73
String lang = corpusElem.getAttribute("lang");
74
String model = lang
75
String encoding = corpusElem.getAttribute("encoding");
76
boolean annotate = "true" == corpusElem.getAttribute("annotate");
77
String xsl = params.getXsltElement(corpusElem).getAttribute("xsl")
78
def xslParams = params.getXsltParams(corpusElem);
79

    
80
File txmSrcDir = new File(rootDir, "xml-txm");
81
File tigerSrcDir = new File(rootDir, "xml-ts");
82
File headerFile = new File(rootDir, "xml-ts/master.xml");
83

    
84
File binDir = new File(Toolbox.getParam(Toolbox.USER_TXM_HOME),"corpora/"+basename);
85

    
86
binDir.deleteDir();
87
binDir.mkdirs();
88
if (!binDir.exists()) {
89
        println "Could not create binDir "+binDir
90
        return;
91
}
92
if (!txmSrcDir.exists()) {
93
        println "Error: txm src dir does not exists: "+txmSrcDir
94
        return;
95
}
96
if (!tigerSrcDir.exists()) {
97
        println "Error: tiger src dir does not exists: "+tigerSrcDir
98
        return;
99
}
100
File txmDir = new File(binDir,"txm/$corpusname");
101
txmDir.deleteDir();
102
txmDir.mkdirs();
103
// copy txm files
104
List<File> srcfiles = txmSrcDir.listFiles();
105
for (File f : srcfiles) {// check XML format, and copy file into binDir
106
        if (f.getName().equals("import.xml") || f.getName().equals("metadata.csv") || f.getName().endsWith(".properties"))
107
                continue;
108
        if (ValidateXml.test(f)) {
109
                FileCopy.copy(f, new File(txmDir, f.getName()));
110
        } else {
111
                println "Won't process file "+f;
112
        }
113
}
114
if (txmDir.listFiles() == null) {
115
        println "No txm file to process"
116
        return;
117
}
118

    
119
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
120
if (MONITOR != null) MONITOR.worked(25, "COMPILING")
121
println "-- COMPILING - Building Search Engine indexes"
122
println "binDir: $binDir"
123
println "txmDir: $txmDir"
124
def c = new compiler();
125
if (debug) c.setDebug();
126
c.setLang(lang);
127
def files = []
128
for (File f : txmDir.listFiles()) if (f.getName().endsWith(".xml") && f.isFile() && !f.isHidden()) files << f;
129
Collections.sort(files);
130

    
131

    
132
if (!c.run(binDir, txmDir, basename, corpusname, files)) {
133
        println "import process stopped";
134
        return;
135
}
136

    
137
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
138
if (MONITOR != null) MONITOR.worked(20, "EDITION")
139
println "-- EDITION - Building edition"
140
new File(binDir,"HTML/$corpusname").deleteDir();
141
new File(binDir,"HTML/$corpusname").mkdirs();
142
File outdir = new File(binDir,"/HTML/$corpusname/default/");
143
outdir.mkdirs();
144

    
145
def second = 0
146
println "Paginating text: "
147
for (File srcfile : files) {
148
        String txtname = srcfile.getName();
149
        int i = txtname.lastIndexOf(".");
150
        if(i > 0) txtname = txtname.substring(0, i);
151
        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
152
        List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
153
        Element text = params.addText(params.corpora.get(corpusname), txtname, srcfile);
154
        if (second) { print(", ") }
155
        if (second > 0 && (second++ % 5) == 0) println ""
156
        print(srcfile.getName());
157
        def ed = new pager(srcfile, outdir, txtname, NoSpaceBefore, NoSpaceAfter, 500, basename, "pb");
158
        Element edition = params.addEdition(text, "default", outdir.getAbsolutePath(), "html");
159
        for (i = 0 ; i < ed.getPageFiles().size();) {
160
                File f = ed.getPageFiles().get(i);
161
                String wordid = ed.getIdx().get(i);
162
                params.addPage(edition, ""+(++i), wordid);
163
        }
164
}
165

    
166
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
167
if (MONITOR != null) MONITOR.worked(20, "FINALIZING")
168
File paramFile = new File(binDir, "import.xml");
169
DomUtils.save(params.root.getOwnerDocument(), paramFile);readyToLoad = true;
170

    
171

    
172
// TMR Modifications start here
173
File tigerDir = new File(binDir, "tiger");
174
tigerDir.mkdir();
175
File master = new File(tigerSrcDir, "master.xml");
176

    
177
// Create the tigersearch.logprop file (used to launch TIGERSearch)
178
File logprop = new File(tigerDir, "tigersearch.logprop");
179
logprop.withWriter("UTF-8") { writer ->
180
        writer.write("""# Default log configuration of the TIGERSearch suite
181
log4j.rootLogger=WARN,Logfile
182
log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=INFO
183
log4j.appender.Logfile=org.apache.log4j.RollingFileAppender
184
log4j.appender.Logfile.File=\${user.home}/tigersearch/tigersearch.log
185
log4j.appender.Logfile.MaxFileSize=500KB
186
log4j.appender.Logfile.MaxBackupIndex=1
187
log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout
188
log4j.appender.Logfile.layout.ConversionPattern=%5r %-5p [%t] %c{2} - %m%n""")        
189
}
190

    
191
// Run TigerRegistry
192
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); }
193

    
194
BasicConfigurator.configure();
195
String uri = master.getAbsolutePath();
196
File tigerBinDir = new File(tigerDir, corpusname)
197
tigerBinDir.mkdir()
198
try {
199
        IndexBuilderErrorHandler handler = new SimpleErrorHandler(tigerBinDir.getAbsolutePath());
200
        XMLIndexing indexing = new XMLIndexing(corpusname, uri, tigerBinDir.getAbsolutePath(), handler,false);
201
        indexing.startIndexing();
202
}
203
catch (IOException e) { System.out.println("IO: "+e.getMessage()); }
204
catch (SAXException e) { System.out.println("SAX: "+e.getMessage()); }