Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / tigersearch / BuildTTFile.groovy @ 479

History | View | Annotate | Download (4.5 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.tigersearch;
29

    
30
import java.text.DateFormat;
31
import java.util.Date;
32
import java.util.ArrayList;
33
import javax.xml.stream.*;
34
import java.net.URL;
35
import org.txm.importer.filters.*;
36

    
37
// TODO: Auto-generated Javadoc
38
/**
39
 * The Class BuildTTFile.
40
 *
41
 * @author mdecorde
42
 * 
43
 * build the TT source for tigerSearch
44
 */
45

    
46
public class BuildTTFile {
47
        
48
        /** The url. */
49
        private def url;
50
        
51
        /** The input data. */
52
        private def inputData;
53
        
54
        /** The factory. */
55
        private def factory;
56
        
57
        /** The parser. */
58
        private XMLStreamReader parser;
59

    
60
        /** The output. */
61
        private def output;
62

    
63
        /** The solotags. */
64
        ArrayList<String> solotags;
65

    
66
        /**
67
         * Instantiates a new builds the tt file.
68
         *
69
         * @param url the url
70
         * @param solotags the solotags
71
         */
72
        public BuildTTFile(URL url, ArrayList<String> solotags) {
73
                try {
74
                        this.url = url;
75
                        this.solotags = solotags;
76
                        inputData = url.openStream();
77
                        factory = XMLInputFactory.newInstance();
78
                        parser = factory.createXMLStreamReader(inputData);
79

    
80
                } catch (XMLStreamException ex) {
81
                        System.out.println(ex);
82
                } catch (IOException ex) {
83
                        System.out.println("IOException while parsing ");
84
                }
85
        }
86

    
87
        /**
88
         * Creates the output.
89
         *
90
         * @param outfile the outfile
91
         * @return true, if successful
92
         */
93
        private boolean createOutput(File outfile) {
94
                try {
95
                        File f = outfile;
96
                        output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
97

    
98
                        return true;
99
                } catch (Exception e) {
100
                        System.out.println(e.getLocalizedMessage());
101
                        return false;
102
                }
103
        }
104

    
105
        /**
106
         * Process.
107
         *
108
         * @param outfile the outfile
109
         * @param targetbalise the targetbalise
110
         * @return true, if successful
111
         */
112
        public boolean process(File outfile, String targetbalise) {
113
                if (createOutput(outfile)) {
114

    
115
                        String lastopenlocalname = "";
116
                        String localname = "";
117
                        try {
118
                                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser
119
                                                .next()) {
120

    
121
                                        switch (event) {
122

    
123
                                        case XMLStreamConstants.START_ELEMENT:
124
                                                localname = parser.getLocalName();
125
                                                switch (localname) {
126
                                                case targetbalise:
127
                                                        String word = parser
128
                                                                        .getAttributeValue(null, "word");
129
                                                        output.write(word + "\n");
130
                                                        break;
131
                                                case "s":
132
                                                        output.write("<s>\n");
133
                                                        break;
134
                                                }
135

    
136
                                        case XMLStreamConstants.END_ELEMENT:
137
                                                localname = parser.getLocalName();
138
                                                switch (localname) {
139

    
140
                                                case targetbalise:
141

    
142
                                                        break;
143
                                                case "s":
144
                                                        output.write("</s>\n");
145
                                                        break;
146
                                                }
147
                                                break;
148

    
149
                                        case XMLStreamConstants.CHARACTERS:
150
                                                // output.write(parser.getText().trim());
151
                                                break;
152
                                        }
153
                                }
154
                                output.close();
155
                                parser.close();
156
                        } catch (XMLStreamException ex) {
157
                                System.out.println(ex);
158
                        } catch (IOException ex) {
159
                                System.out.println("IOException while parsing " + inputData);
160
                        }
161
                }
162
        }
163

    
164
        /**
165
         * The main method.
166
         *
167
         * @param args the arguments
168
         */
169
        public static void main(String[] args) {
170

    
171
                String rootDir = "~/xml/beroul/";
172
                // new File(rootDir+"/identity/").mkdir();
173

    
174
                ArrayList<String> milestones = new ArrayList<String>();// the tags who
175
                // you want them
176
                // to stay
177
                // milestones
178
                milestones.add("tagUsage");
179
                milestones.add("pb");
180
                milestones.add("lb");
181
                milestones.add("catRef");
182

    
183
                File srcfile = new File(rootDir, "beroul.xml");
184
                File resultfile = new File(rootDir, "beroul.tt");
185
                println("identity file : " + srcfile + " to : " + resultfile);
186

    
187
                def builder = new BuildTTFile(srcfile.toURL(), milestones);
188
                builder.process(resultfile, "t");
189

    
190
                return;
191
        }
192

    
193
}