Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / tigersearch / InjectAnnotations.groovy @ 1688

History | View | Annotate | Download (6.1 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 479 mdecorde
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25 321 mdecorde
// $LastChangedRevision: 3400 $
26 321 mdecorde
// $LastChangedBy: mdecorde $
27 321 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.scripts.tigersearch;
29 321 mdecorde
30 321 mdecorde
import java.text.DateFormat;
31 321 mdecorde
import java.util.Date;
32 321 mdecorde
import java.util.ArrayList;
33 321 mdecorde
import javax.xml.stream.*;
34 321 mdecorde
import java.net.URL;
35 1000 mdecorde
import org.txm.importer.scripts.filters.*;
36 321 mdecorde
37 321 mdecorde
// TODO: Auto-generated Javadoc
38 321 mdecorde
/**
39 321 mdecorde
 * The Class InjectAnnotations.
40 321 mdecorde
 *
41 321 mdecorde
 * @author mdecorde
42 321 mdecorde
 *
43 321 mdecorde
 * inject annotations into ONE file
44 321 mdecorde
 */
45 321 mdecorde
46 321 mdecorde
public class InjectAnnotations {
47 321 mdecorde
48 321 mdecorde
        /** The url. */
49 321 mdecorde
        private def url;
50 321 mdecorde
51 321 mdecorde
        /** The input data. */
52 321 mdecorde
        private def inputData;
53 321 mdecorde
54 321 mdecorde
        /** The factory. */
55 321 mdecorde
        private def factory;
56 321 mdecorde
57 321 mdecorde
        /** The parser. */
58 321 mdecorde
        private XMLStreamReader parser;
59 321 mdecorde
60 321 mdecorde
        /** The reader. */
61 321 mdecorde
        private Reader reader;
62 321 mdecorde
63 321 mdecorde
        /** The output. */
64 321 mdecorde
        private def output;
65 321 mdecorde
66 321 mdecorde
        /** The solotags. */
67 321 mdecorde
        ArrayList<String> solotags;
68 321 mdecorde
69 321 mdecorde
        /** The lespos. */
70 321 mdecorde
        HashSet<String> lespos = new HashSet<String>();
71 321 mdecorde
72 321 mdecorde
        /**
73 321 mdecorde
         * Instantiates a new inject annotations.
74 321 mdecorde
         *
75 321 mdecorde
         * @param url the url
76 321 mdecorde
         * @param annotations the annotations
77 321 mdecorde
         * @param solotags the solotags
78 321 mdecorde
         */
79 321 mdecorde
        public InjectAnnotations(URL url, File annotations,
80 321 mdecorde
                        ArrayList<String> solotags) {
81 321 mdecorde
                try {
82 321 mdecorde
                        this.url = url;
83 321 mdecorde
                        this.solotags = solotags;
84 321 mdecorde
                        inputData = url.openStream();
85 321 mdecorde
                        factory = XMLInputFactory.newInstance();
86 321 mdecorde
                        parser = factory.createXMLStreamReader(inputData);
87 321 mdecorde
88 321 mdecorde
                        reader = new FileReader(annotations);
89 321 mdecorde
90 321 mdecorde
                } catch (XMLStreamException ex) {
91 321 mdecorde
                        System.out.println(ex);
92 321 mdecorde
                } catch (IOException ex) {
93 321 mdecorde
                        System.out.println("IOException while parsing ");
94 321 mdecorde
                }
95 321 mdecorde
        }
96 321 mdecorde
97 321 mdecorde
        /**
98 321 mdecorde
         * Creates the output.
99 321 mdecorde
         *
100 321 mdecorde
         * @param outfile the outfile
101 321 mdecorde
         * @return true, if successful
102 321 mdecorde
         */
103 321 mdecorde
        private boolean createOutput(File outfile) {
104 321 mdecorde
                try {
105 321 mdecorde
                        File f = outfile;
106 321 mdecorde
                        output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
107 321 mdecorde
108 321 mdecorde
                        return true;
109 321 mdecorde
                } catch (Exception e) {
110 321 mdecorde
                        System.out.println(e.getLocalizedMessage());
111 321 mdecorde
                        return false;
112 321 mdecorde
                }
113 321 mdecorde
        }
114 321 mdecorde
115 321 mdecorde
        /**
116 321 mdecorde
         * Gets the next annotation.
117 321 mdecorde
         *
118 321 mdecorde
         * @return the next annotation
119 321 mdecorde
         */
120 321 mdecorde
        private String getNextAnnotation() {
121 321 mdecorde
                String line = reader.readLine();
122 321 mdecorde
                while (line.startsWith("<"))
123 321 mdecorde
                        line = reader.readLine();
124 321 mdecorde
                lespos.add(line.split("\t")[1]);
125 321 mdecorde
                return line = line.split("\t")[1];
126 321 mdecorde
        }
127 321 mdecorde
128 321 mdecorde
        /**
129 321 mdecorde
         * Process.
130 321 mdecorde
         *
131 321 mdecorde
         * @param outfile the outfile
132 321 mdecorde
         * @return true, if successful
133 321 mdecorde
         */
134 321 mdecorde
        public boolean process(File outfile) {
135 321 mdecorde
                if (createOutput(outfile)) {
136 321 mdecorde
137 321 mdecorde
                        String lastopenlocalname = "";
138 321 mdecorde
                        String localname = "";
139 321 mdecorde
                        try {
140 321 mdecorde
                                for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser
141 321 mdecorde
                                                .next()) {
142 321 mdecorde
143 321 mdecorde
                                        switch (event) {
144 321 mdecorde
                                        case XMLStreamConstants.START_ELEMENT:
145 321 mdecorde
                                                localname = parser.getLocalName();
146 321 mdecorde
                                                String prefix = parser.getPrefix();
147 321 mdecorde
                                                if (prefix == null || prefix == "")
148 321 mdecorde
                                                        prefix = "";
149 321 mdecorde
                                                else
150 321 mdecorde
                                                        prefix += ":";
151 321 mdecorde
152 321 mdecorde
                                                lastopenlocalname = localname;
153 321 mdecorde
                                                output.write("\n<" + prefix + localname);
154 321 mdecorde
155 321 mdecorde
                                                for (int i = 0; i < parser.getAttributeCount(); i++)
156 321 mdecorde
                                                        output.write(" " + parser.getAttributeLocalName(i)
157 321 mdecorde
                                                                        + "=\"" + parser.getAttributeValue(i)
158 321 mdecorde
                                                                        + "\"");
159 321 mdecorde
160 321 mdecorde
                                                // get annotation
161 321 mdecorde
                                                if (localname.equals("t"))
162 321 mdecorde
                                                        output.write(" pos=\"" + getNextAnnotation()
163 321 mdecorde
                                                                                        + "\"");
164 321 mdecorde
165 321 mdecorde
                                                if (solotags.contains(localname))
166 321 mdecorde
                                                        output.write("/>");
167 321 mdecorde
                                                else
168 321 mdecorde
                                                        output.write(">");
169 321 mdecorde
                                                break;
170 321 mdecorde
171 321 mdecorde
                                        case XMLStreamConstants.END_ELEMENT:
172 321 mdecorde
173 321 mdecorde
                                                localname = parser.getLocalName();
174 321 mdecorde
                                                String prefix = parser.getPrefix();
175 321 mdecorde
                                                if (prefix == null || prefix == "")
176 321 mdecorde
                                                        prefix = "";
177 321 mdecorde
                                                else
178 321 mdecorde
                                                        prefix += ":";
179 321 mdecorde
180 321 mdecorde
                                                switch (localname) {
181 321 mdecorde
182 321 mdecorde
                                                default:
183 321 mdecorde
                                                        if (!solotags.contains(localname))
184 321 mdecorde
                                                                if (lastopenlocalname.equals(localname))
185 321 mdecorde
                                                                        output.write("</" + prefix + localname
186 321 mdecorde
                                                                                        + ">");
187 321 mdecorde
                                                                else
188 321 mdecorde
                                                                        output.write("\n</" + prefix + localname
189 321 mdecorde
                                                                                        + ">");
190 321 mdecorde
                                                }
191 321 mdecorde
                                                break;
192 321 mdecorde
193 321 mdecorde
                                        case XMLStreamConstants.CHARACTERS:
194 321 mdecorde
                                                output.write(parser.getText().trim());
195 321 mdecorde
                                                break;
196 321 mdecorde
                                        }
197 321 mdecorde
                                }
198 321 mdecorde
                                output.close();
199 1688 mdecorde
200 321 mdecorde
                        } catch (XMLStreamException ex) {
201 321 mdecorde
                                System.out.println(ex);
202 321 mdecorde
                        } catch (IOException ex) {
203 321 mdecorde
                                System.out.println("IOException while parsing " + inputData);
204 321 mdecorde
                        }
205 321 mdecorde
                }
206 1688 mdecorde
                if (parser != null) parser.close();
207 1688 mdecorde
                if (inputData != null) inputData.close();
208 321 mdecorde
        }
209 321 mdecorde
210 321 mdecorde
        /**
211 321 mdecorde
         * Gets the feature.
212 321 mdecorde
         *
213 321 mdecorde
         * @param f the f
214 321 mdecorde
         * @return the feature
215 321 mdecorde
         */
216 321 mdecorde
        public void getFeature(File f)
217 321 mdecorde
        {
218 321 mdecorde
                Writer writer = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8");
219 321 mdecorde
                writer.write("<feature name=\"pos\" domain=\"T\">\n")
220 321 mdecorde
                for(String pos : lespos)
221 321 mdecorde
                        writer.write("<value name=\""+pos+"\"></value>\n");
222 321 mdecorde
                writer.write("</feature>\n")
223 321 mdecorde
                writer.close();
224 321 mdecorde
        }
225 321 mdecorde
226 321 mdecorde
        /**
227 321 mdecorde
         * The main method.
228 321 mdecorde
         *
229 321 mdecorde
         * @param args the arguments
230 321 mdecorde
         */
231 321 mdecorde
        public static void main(String[] args) {
232 321 mdecorde
233 321 mdecorde
                String rootDir = "~/xml/beroul/";
234 321 mdecorde
                new File(rootDir + "/identity/").mkdir();
235 321 mdecorde
236 321 mdecorde
                ArrayList<String> milestones = new ArrayList<String>();// the tags who
237 321 mdecorde
                // you want them
238 321 mdecorde
                // to stay
239 321 mdecorde
                // milestones
240 321 mdecorde
                milestones.add("tagUsage");
241 321 mdecorde
                milestones.add("pb");
242 321 mdecorde
                milestones.add("lb");
243 321 mdecorde
                milestones.add("catRef");
244 321 mdecorde
245 321 mdecorde
                File srcfile = new File(rootDir, "beroul.xml");
246 321 mdecorde
                File annotationsfiles = new File(rootDir, "result.tt");
247 321 mdecorde
                File resultfile = new File(rootDir, "beroul-result.xml");
248 321 mdecorde
                println("identity file : " + srcfile + " to : " + resultfile);
249 321 mdecorde
250 321 mdecorde
                def builder = new InjectAnnotations(srcfile.toURL(), annotationsfiles,
251 321 mdecorde
                                milestones);
252 321 mdecorde
                builder.process(resultfile);
253 321 mdecorde
254 321 mdecorde
                return;
255 321 mdecorde
        }
256 321 mdecorde
257 321 mdecorde
}