Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / RGAQCJ / rgaqcjLoader.groovy @ 479

History | View | Annotate | Download (4.4 kB)

1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
// 
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
// 
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
// 
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
// 
27
// 
28
// 
29
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
30
// $LastChangedRevision: 2386 $
31
// $LastChangedBy: mdecorde $ 
32
//
33
package org.txm.importer.RGAQCJ
34
;
35

    
36
import org.txm.importer.RGAQCJ.importer;
37
import org.txm.importer.RGAQCJ.compiler;
38
import org.txm.importer.bfm.pager;
39
import org.txm.objects.*;
40
import org.txm.utils.i18n.*;
41
import org.txm.*;
42

    
43
// TODO: Auto-generated Javadoc
44
/* (non-Javadoc)
45
 * @see groovy.lang.Script#run()
46
 */
47
String userDir = System.getProperty("user.home");
48
String rootDir;
49
String lang;
50
String encoding;
51
String model;
52
try{rootDir = rootDirBinding;lang=langBinding;encoding=encodingBinding;model=modelBinding;}
53
catch(Exception)
54
{        println "DEV MODE";//exception means we debug
55
        if(!org.txm.Toolbox.isInitialized())
56
        {
57
                rootDir = userDir+"/xml/weblex/";
58
                lang="fr";
59
                encoding= "UTF-8";
60
                model="rgaqcj";// not used
61
                Toolbox.workspace = new Workspace(new File(userDir,"TXM/workspaces/default.xml"));
62
                Toolbox.setParam(Toolbox.INSTALL_DIR,new File(userDir,"txminstall"));
63
                Toolbox.setParam(Toolbox.TREETAGGER_INSTALL_PATH,new File(userDir,"txminstall/treetagger"));
64
                Toolbox.setParam(Toolbox.TREETAGGER_MODELS_PATH,new File(userDir,"txminstall/treetagger/models"));
65
                Toolbox.setParam(Toolbox.USER_TXM_HOME, new File(System.getProperty("user.home"),"TXM"));
66
        }
67
}
68

    
69
File dir = new File(rootDir);
70
String basename = dir.getName();
71

    
72
println "-- IMPORTER - Reading source files"
73
File paramfile = new File(userDir, "TXM/weblexloader.prefs")
74
def imp = new importer()
75
if(!imp.run(dir, paramfile, basename))
76
{
77
        println("Stop import process")
78
        return;
79
}
80

    
81
File binDir = new File(Toolbox.getParam(Toolbox.USER_TXM_HOME),"corpora/"+basename);
82
rootDir = binDir.getAbsolutePath();
83
dir = new File(rootDir);
84

    
85
println "No annotation to do"
86

    
87

    
88
println "-- COMPILER - Building Search Engine indexes"
89
def c = new compiler();
90
c.setDebug();
91
//c.setCwbPath("~/TXM/cwb/bin");
92
c.setLang(lang);
93
c.run(dir);
94

    
95

    
96
//move registry file to cwb registry dir
97
File registryfile = new File(rootDir+"/registry", basename);
98
if(registryfile.exists())
99
        FileCopy.copy(registryfile,new File(Toolbox.getParam(Toolbox.USER_TXM_HOME), "registry/"+basename))
100

    
101
Workspace w = org.txm.Toolbox.workspace;
102
Project p = w.getProject("default")
103
p.removeBase(basename)
104
Base b = p.addBase(basename);
105
b.addDirectory(new File(rootDir, "txm"));
106
b.setAttribute("lang", lang)
107
b.propagateAttribute("lang")
108

    
109
println "-- EDITION"
110
new File(rootDir+"/HTML/").deleteDir()
111
new File(rootDir+"/HTML/").mkdir();
112
new File(rootDir+"/HTML/default/").mkdir();
113
files = new File(rootDir,"txm").listFiles();
114
println files
115
for(String textname : b.getTextsID())
116
{
117
        Text text = b .getText(textname);
118
        File srcfile = text.getSource();
119
        File resultfile = new File(rootDir+"/HTML", srcfile.getName().substring(0, srcfile.getName().length()-4)+".html");
120
        List<String> NoSpaceBefore = LangFormater.getNoSpaceBefore(lang);
121
        List<String> NoSpaceAfter = LangFormater.getNoSpaceAfter(lang);
122
        println("Building edition  : "+srcfile+" to : "+resultfile );
123
        
124
        def ed = new pager(srcfile, resultfile, NoSpaceBefore, NoSpaceAfter, 600);
125
        //ed.writecorrespondancesFile(new File(rootDir,"edition_correspondances.txt"))
126
        Edition editionweb = text.addEdition("default", "html", resultfile);
127

    
128
        for(int i = 0 ; i < ed.getPageFiles().size();i++)
129
        {
130
                File f = ed.getPageFiles().get(i);
131
                String idx = ed.getIdx().get(i);
132
                editionweb.addPage(f, idx);
133
        }
134
        
135
//        Edition editionbp = text.addEdition("onepage", "html", resultfile);
136
//        editionbp.addPage(resultfile, ed.getIdx().get(0));
137
}
138

    
139
w.save();