root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / scripts / importer / tigersearch / tigersearchLoader.groovy @ 1000
History | View | Annotate | Download (3.2 kB)
1 | 911 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 911 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 911 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 911 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 911 | mdecorde | //
|
6 | 911 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 911 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 911 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 911 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 911 | mdecorde | // later version.
|
11 | 911 | mdecorde | //
|
12 | 911 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 911 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 911 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 911 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 911 | mdecorde | // details.
|
17 | 911 | mdecorde | //
|
18 | 911 | mdecorde | // You should have received a copy of the GNU General
|
19 | 911 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 911 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 911 | mdecorde | //
|
22 | 911 | mdecorde | // $LastChangedDate: 2015-06-03 15:04:53 +0200 (mer., 03 juin 2015) $
|
23 | 911 | mdecorde | // $LastChangedRevision: 2984 $
|
24 | 911 | mdecorde | // $LastChangedBy: mdecorde $
|
25 | 911 | mdecorde | //
|
26 | 1000 | mdecorde | package org.txm.scripts.importer.tigersearch;
|
27 | 911 | mdecorde | |
28 | 911 | mdecorde | import javax.xml.stream.XMLStreamReader; |
29 | 911 | mdecorde | |
30 | 1000 | mdecorde | import org.txm.scripts.importer.RemoveTag; |
31 | 911 | mdecorde | import org.txm.importer.ApplyXsl2; |
32 | 911 | mdecorde | import org.txm.importer.ValidateXml; |
33 | 911 | mdecorde | import org.txm.objects.*; |
34 | 911 | mdecorde | import org.txm.tokenizer.TokenizerClasses; |
35 | 911 | mdecorde | import org.txm.utils.*; |
36 | 911 | mdecorde | import org.txm.*; |
37 | 927 | mdecorde | import org.txm.importer.xmltxm.*; |
38 | 911 | mdecorde | import org.txm.utils.i18n.*; |
39 | 911 | mdecorde | import org.txm.metadatas.*; |
40 | 911 | mdecorde | import javax.xml.stream.*; |
41 | 911 | mdecorde | import org.w3c.dom.Element |
42 | 911 | mdecorde | import org.txm.utils.xml.DomUtils; |
43 | 911 | mdecorde | import org.txm.importer.xtz.* |
44 | 911 | mdecorde | |
45 | 911 | mdecorde | String userDir = System.getProperty("user.home"); |
46 | 911 | mdecorde | |
47 | 911 | mdecorde | def MONITOR;
|
48 | 911 | mdecorde | boolean debug = org.txm.utils.logger.Log.isPrintingErrors();
|
49 | 911 | mdecorde | BaseParameters params; |
50 | 911 | mdecorde | try {params = paramsBinding;MONITOR=monitor} catch (Exception) |
51 | 911 | mdecorde | { println "DEV MODE";//exception means we debug |
52 | 911 | mdecorde | debug = true
|
53 | 911 | mdecorde | params = new BaseParameters(new File(userDir, "xml/roland/import.xml")) |
54 | 911 | mdecorde | params.load() |
55 | 911 | mdecorde | if (!org.txm.Toolbox.isInitialized()) {
|
56 | 911 | mdecorde | |
57 | 911 | mdecorde | TokenizerClasses.loadFromNode(params.getTokenizerElement(params.getCorpusElement())); |
58 | 911 | mdecorde | Toolbox.setParam(Toolbox.INSTALL_DIR,new File("/usr/lib/TXM")); |
59 | 911 | mdecorde | //Toolbox.setParam(Toolbox.INSTALL_DIR,new File("C:\\Program Files\\TXM"));//For Windows
|
60 | 911 | mdecorde | Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"treetagger")); |
61 | 911 | mdecorde | //Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File("C:\\Program Files\\treetagger"));//for Windows
|
62 | 911 | mdecorde | Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"treetagger/models")); |
63 | 911 | mdecorde | Toolbox.setParam(Toolbox.METADATA_ENCODING, "UTF-8");
|
64 | 911 | mdecorde | Toolbox.setParam(Toolbox.METADATA_COLSEPARATOR, ",");
|
65 | 911 | mdecorde | Toolbox.setParam(Toolbox.METADATA_TXTSEPARATOR, "\"");
|
66 | 911 | mdecorde | //Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File("C:\\Program Files\\treetagger\\models"));//for Windows
|
67 | 911 | mdecorde | Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"), "TXM")); |
68 | 911 | mdecorde | } |
69 | 911 | mdecorde | } |
70 | 911 | mdecorde | |
71 | 911 | mdecorde | if (params == null) { println "no parameters. Aborting"; return; } |
72 | 911 | mdecorde | |
73 | 911 | mdecorde | //params.getKeyValueParameters().put(ImportKeys.CLEAN, "false")
|
74 | 911 | mdecorde | //params.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false")
|
75 | 911 | mdecorde | //params.getKeyValueParameters().put(ImportKeys.DEBUG, "false")
|
76 | 911 | mdecorde | //params.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "false")
|
77 | 911 | mdecorde | |
78 | 911 | mdecorde | TSImport i = new TSImport(params);
|
79 | 911 | mdecorde | i.process(); |
80 | 911 | mdecorde | readyToLoad = i.isSuccessful |