Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / importer / cqp / compiler.groovy @ 1000

History | View | Annotate | Download (5.4 kB)

1

    
2

    
3
// Copyright © 2010-2013 ENS de Lyon.
4
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
5
// Lyon 2, University of Franche-Comté, University of Nice
6
// Sophia Antipolis, University of Paris 3.
7
//
8
// The TXM platform is free software: you can redistribute it
9
// and/or modify it under the terms of the GNU General Public
10
// License as published by the Free Software Foundation,
11
// either version 2 of the License, or (at your option) any
12
// later version.
13
//
14
// The TXM platform is distributed in the hope that it will be
15
// useful, but WITHOUT ANY WARRANTY; without even the implied
16
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17
// PURPOSE. See the GNU General Public License for more
18
// details.
19
//
20
// You should have received a copy of the GNU General
21
// Public License along with the TXM platform. If not, see
22
// http://www.gnu.org/licenses.
23
//
24
//
25
//
26
// $LastChangedDate: 2012-01-05 14:27:34 +0100 (jeu., 05 janv. 2012) $
27
// $LastChangedRevision: 2096 $
28
// $LastChangedBy: mdecorde $
29
//
30
package org.txm.scripts.importer.cqp
31

    
32
import org.txm.Toolbox;
33
import org.txm.importer.cwb.*
34
import org.txm.scripts.importer.*;
35
import org.txm.scripts.*;
36
import org.txm.importer.scripts.xmltxm.*;
37
import org.txm.utils.io.FileCopy;
38
import org.txm.utils.treetagger.TreeTagger;
39
import java.util.ArrayList;
40
import javax.xml.stream.*;
41
import java.net.URL;
42
import java.io.File;
43
import java.util.HashMap;
44
import java.util.List;
45

    
46
// TODO: Auto-generated Javadoc
47
/**
48
 * The Class compiler.
49
 */
50
class compiler
51
{
52
        /** The debug. */
53
        boolean debug = false;
54

    
55
        /** The dir. */
56
        private def dir;
57

    
58
        File srcCQPFile, srcRegistryFile;
59

    
60
        public def pAttributesList = [];
61
        public def sAttributesList = [];
62

    
63
        public compiler(File cqpFile, File registryFile) {
64
                this.srcCQPFile = cqpFile;
65
                this.srcRegistryFile = registryFile
66
        }
67

    
68
        /**
69
         * Sets the debug.
70
         *
71
         * @return the java.lang. object
72
         */
73
        public setDebug()
74
        {
75
                debug =true;
76
        }
77

    
78
        /**
79
         * Run.
80
         *
81
         * @param rootDirFile the root dir file
82
         * @param basename the basename
83
         * @return true, if successful
84
         */
85
        public boolean run(File binDir, String corpusname)
86
        {
87
                if (!(CwbEncode.isExecutableAvailable() && CwbMakeAll.isExecutableAvailable())) {
88
                        println ("Error: CWB executables not well set.")
89
                        return false;
90
                }
91
                if (!binDir.exists()) {
92
                        println ("binary directory does not exists: "+binDir)
93
                        return false;
94
                }
95

    
96
                if (!srcCQPFile.exists()) {
97
                        println "Error: cannot find the CQP file $srcCQPFile"
98
                        return false;
99
                }
100

    
101
                File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp");
102
                new File(binDir, "cqp").deleteDir();
103
                new File(binDir, "cqp").mkdir();
104
                new File(binDir, "data").deleteDir();
105
                new File(binDir, "data").mkdir();
106
                new File(binDir, "registry").mkdir();
107

    
108
                FileCopy.copy(srcCQPFile, cqpFile);
109

    
110
                //2- Import into CWB
111
                def outDir = binDir.getAbsolutePath()+"/";
112

    
113
                CwbEncode cwbEn = new CwbEncode();
114
                CwbMakeAll cwbMa = new CwbMakeAll();
115
                cwbEn.setDebug(debug);
116
                cwbMa.setDebug(debug);
117

    
118
                String[] pAttributes;
119
                String[] sAttributes;
120

    
121
                if (srcRegistryFile == null) {
122
                        println "WARNING: No registry file found in source directory"
123
                        println "We'll search for positional attributes and structural attributes in the CQP file"
124

    
125
                        // s attributes
126
                        BuildCwbEncodeArgsFromCQP argsgetter = new BuildCwbEncodeArgsFromCQP(); // XML stream
127
                        argsgetter.process(cqpFile); //$NON-NLS-1$
128
                        sAttributesList = argsgetter.getSAttributes();
129

    
130
                        // p attributes
131
                        int nbAttr = -1;
132
                        File tmp = File.createTempFile("txm", ".cqp", cqpFile.getParentFile());
133
                        int wcounter = 1;
134
                        println "Adding the 'id' property to the CQP file and getting word properties number."
135
                        tmp.withWriter("UTF-8") { writer ->
136
                                cqpFile.eachLine("UTF-8") { line ->
137
                                        if (!line.startsWith("<")) {
138
                                                if (nbAttr == -1) nbAttr = line.split("\t").size();
139
                                                writer.println(line+"\tw_"+(wcounter++))
140
                                        } else {
141
                                                writer.println(line)
142
                                        }
143
                                        writer.flush();
144
                                }
145
                        }
146
                        cqpFile.delete()
147
                        tmp.renameTo(cqpFile)
148

    
149
                        System.out.println("Found "+(nbAttr-1)+" word properties, $nbAttr with the 'id'");
150
                        for (int i = 1; i < nbAttr ; i++) {
151
                                pAttributesList << "p$i";
152
                        }
153
                        pAttributesList << "id"
154
                } else {
155
                        ReadRegistryFile reader = new ReadRegistryFile(srcRegistryFile);
156
                        pAttributesList = reader.getPAttributes();
157
                        sAttributesList = reader.getSAttributes();
158
                        pAttributesList.remove(0) // remove word
159

    
160
                        if (!pAttributesList.contains("id")) {
161
                                System.out.println("Error: The registry file does not declare the 'id' word property");
162
                                return false;
163
                        }
164
                }
165

    
166
                pAttributes = pAttributesList; // cast to array
167
                sAttributes = sAttributesList; // cast to array
168

    
169
                println "pAttrs : "+Arrays.toString(pAttributes)
170
                println "sAttrs : "+Arrays.toString(sAttributes)
171

    
172
                try {
173
                        cwbEn.setDebug(debug);
174
                        cwbMa.setDebug(debug);
175
                        String regPath =outDir + "/registry/"+corpusname.toLowerCase()
176
                        cwbEn.run(outDir + "/data/$corpusname", 
177
                                        cqpFile.getAbsolutePath(),
178
                                        regPath, pAttributes, sAttributes);
179
                        if (!new File(regPath).exists()) {
180
                                println "Error: The registry file was not created: $regPath. See https://groupes.renater.fr/wiki/txm-users/public/faq"
181
                                return false;
182
                        }
183
                        cwbMa.run(corpusname, outDir + "/registry");
184
                        return true;
185
                } catch (Exception ex) {System.out.println(ex); return false;}
186

    
187
                return true;
188
        }
189

    
190
        /**
191
         * The main method.
192
         *
193
         * @param args the arguments
194
         */
195
        public static void main(String[] args)
196
        {
197
                File dir = new File("~/xml/perrault/txm/");
198
                List<File> files = dir.listFiles();
199
                new compiler().run(files);
200
        }
201
}