Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / tigersearch / InjectAnnotations.groovy @ 479

History | View | Annotate | Download (6 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.scripts.tigersearch;
29

    
30
import java.text.DateFormat;
31
import java.util.Date;
32
import java.util.ArrayList;
33
import javax.xml.stream.*;
34
import java.net.URL;
35
import org.txm.importer.filters.*;
36

    
37
// TODO: Auto-generated Javadoc
38
/**
39
 * The Class InjectAnnotations.
40
 *
41
 * @author mdecorde
42
 * 
43
 * inject annotations into ONE file
44
 */
45

    
46
public class InjectAnnotations {
47
        
48
        /** The url. */
49
        private def url;
50
        
51
        /** The input data. */
52
        private def inputData;
53
        
54
        /** The factory. */
55
        private def factory;
56
        
57
        /** The parser. */
58
        private XMLStreamReader parser;
59

    
60
        /** The reader. */
61
        private Reader reader;
62

    
63
        /** The output. */
64
        private def output;
65

    
66
        /** The solotags. */
67
        ArrayList<String> solotags;
68
        
69
        /** The lespos. */
70
        HashSet<String> lespos = new HashSet<String>();
71

    
72
        /**
73
         * Instantiates a new inject annotations.
74
         *
75
         * @param url the url
76
         * @param annotations the annotations
77
         * @param solotags the solotags
78
         */
79
        public InjectAnnotations(URL url, File annotations,
80
                        ArrayList<String> solotags) {
81
                try {
82
                        this.url = url;
83
                        this.solotags = solotags;
84
                        inputData = url.openStream();
85
                        factory = XMLInputFactory.newInstance();
86
                        parser = factory.createXMLStreamReader(inputData);
87

    
88
                        reader = new FileReader(annotations);
89

    
90
                } catch (XMLStreamException ex) {
91
                        System.out.println(ex);
92
                } catch (IOException ex) {
93
                        System.out.println("IOException while parsing ");
94
                }
95
        }
96

    
97
        /**
98
         * Creates the output.
99
         *
100
         * @param outfile the outfile
101
         * @return true, if successful
102
         */
103
        private boolean createOutput(File outfile) {
104
                try {
105
                        File f = outfile;
106
                        output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
107

    
108
                        return true;
109
                } catch (Exception e) {
110
                        System.out.println(e.getLocalizedMessage());
111
                        return false;
112
                }
113
        }
114

    
115
        /**
116
         * Gets the next annotation.
117
         *
118
         * @return the next annotation
119
         */
120
        private String getNextAnnotation() {
121
                String line = reader.readLine();
122
                while (line.startsWith("<"))
123
                        line = reader.readLine();
124
                lespos.add(line.split("\t")[1]);
125
                return line = line.split("\t")[1];
126
        }
127

    
128
        /**
129
         * Process.
130
         *
131
         * @param outfile the outfile
132
         * @return true, if successful
133
         */
134
        public boolean process(File outfile) {
135
                if (createOutput(outfile)) {
136

    
137
                        String lastopenlocalname = "";
138
                        String localname = "";
139
                        try {
140
                                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser
141
                                                .next()) {
142

    
143
                                        switch (event) {
144
                                        case XMLStreamConstants.START_ELEMENT:
145
                                                localname = parser.getLocalName();
146
                                                String prefix = parser.getPrefix();
147
                                                if (prefix == null || prefix == "")
148
                                                        prefix = "";
149
                                                else
150
                                                        prefix += ":";
151

    
152
                                                lastopenlocalname = localname;
153
                                                output.write("\n<" + prefix + localname);
154

    
155
                                                for (int i = 0; i < parser.getAttributeCount(); i++)
156
                                                        output.write(" " + parser.getAttributeLocalName(i)
157
                                                                        + "=\"" + parser.getAttributeValue(i)
158
                                                                        + "\"");
159

    
160
                                                // get annotation
161
                                                if (localname.equals("t"))
162
                                                        output.write(" pos=\"" + getNextAnnotation()
163
                                                                                        + "\"");
164

    
165
                                                if (solotags.contains(localname))
166
                                                        output.write("/>");
167
                                                else
168
                                                        output.write(">");
169
                                                break;
170

    
171
                                        case XMLStreamConstants.END_ELEMENT:
172

    
173
                                                localname = parser.getLocalName();
174
                                                String prefix = parser.getPrefix();
175
                                                if (prefix == null || prefix == "")
176
                                                        prefix = "";
177
                                                else
178
                                                        prefix += ":";
179

    
180
                                                switch (localname) {
181

    
182
                                                default:
183
                                                        if (!solotags.contains(localname))
184
                                                                if (lastopenlocalname.equals(localname))
185
                                                                        output.write("</" + prefix + localname
186
                                                                                        + ">");
187
                                                                else
188
                                                                        output.write("\n</" + prefix + localname
189
                                                                                        + ">");
190
                                                }
191
                                                break;
192

    
193
                                        case XMLStreamConstants.CHARACTERS:
194
                                                output.write(parser.getText().trim());
195
                                                break;
196
                                        }
197
                                }
198
                                output.close();
199
                                parser.close();
200
                        } catch (XMLStreamException ex) {
201
                                System.out.println(ex);
202
                        } catch (IOException ex) {
203
                                System.out.println("IOException while parsing " + inputData);
204
                        }
205
                }
206
        }
207

    
208
        /**
209
         * Gets the feature.
210
         *
211
         * @param f the f
212
         * @return the feature
213
         */
214
        public void getFeature(File f)
215
        {
216
                Writer writer = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
217
                writer.write("<feature name=\"pos\" domain=\"T\">\n")
218
                for(String pos : lespos)
219
                        writer.write("<value name=\""+pos+"\"></value>\n");
220
                writer.write("</feature>\n")
221
                writer.close();
222
        }
223

    
224
        /**
225
         * The main method.
226
         *
227
         * @param args the arguments
228
         */
229
        public static void main(String[] args) {
230

    
231
                String rootDir = "~/xml/beroul/";
232
                new File(rootDir + "/identity/").mkdir();
233

    
234
                ArrayList<String> milestones = new ArrayList<String>();// the tags who
235
                // you want them
236
                // to stay
237
                // milestones
238
                milestones.add("tagUsage");
239
                milestones.add("pb");
240
                milestones.add("lb");
241
                milestones.add("catRef");
242

    
243
                File srcfile = new File(rootDir, "beroul.xml");
244
                File annotationsfiles = new File(rootDir, "result.tt");
245
                File resultfile = new File(rootDir, "beroul-result.xml");
246
                println("identity file : " + srcfile + " to : " + resultfile);
247

    
248
                def builder = new InjectAnnotations(srcfile.toURL(), annotationsfiles,
249
                                milestones);
250
                builder.process(resultfile);
251

    
252
                return;
253
        }
254

    
255
}