root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / tigersearch / InjectAnnotations.groovy @ 1688
History | View | Annotate | Download (6.1 kB)
1 | 321 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 321 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 321 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 321 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 321 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 321 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 321 | mdecorde | // later version.
|
11 | 321 | mdecorde | //
|
12 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 321 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 321 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 321 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 321 | mdecorde | // details.
|
17 | 321 | mdecorde | //
|
18 | 321 | mdecorde | // You should have received a copy of the GNU General
|
19 | 321 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 321 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 321 | mdecorde | //
|
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | //
|
24 | 479 | mdecorde | // $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
|
25 | 321 | mdecorde | // $LastChangedRevision: 3400 $
|
26 | 321 | mdecorde | // $LastChangedBy: mdecorde $
|
27 | 321 | mdecorde | //
|
28 | 1000 | mdecorde | package org.txm.scripts.scripts.tigersearch;
|
29 | 321 | mdecorde | |
30 | 321 | mdecorde | import java.text.DateFormat; |
31 | 321 | mdecorde | import java.util.Date; |
32 | 321 | mdecorde | import java.util.ArrayList; |
33 | 321 | mdecorde | import javax.xml.stream.*; |
34 | 321 | mdecorde | import java.net.URL; |
35 | 1000 | mdecorde | import org.txm.importer.scripts.filters.*; |
36 | 321 | mdecorde | |
37 | 321 | mdecorde | // TODO: Auto-generated Javadoc
|
38 | 321 | mdecorde | /**
|
39 | 321 | mdecorde | * The Class InjectAnnotations.
|
40 | 321 | mdecorde | *
|
41 | 321 | mdecorde | * @author mdecorde
|
42 | 321 | mdecorde | *
|
43 | 321 | mdecorde | * inject annotations into ONE file
|
44 | 321 | mdecorde | */
|
45 | 321 | mdecorde | |
46 | 321 | mdecorde | public class InjectAnnotations { |
47 | 321 | mdecorde | |
48 | 321 | mdecorde | /** The url. */
|
49 | 321 | mdecorde | private def url; |
50 | 321 | mdecorde | |
51 | 321 | mdecorde | /** The input data. */
|
52 | 321 | mdecorde | private def inputData; |
53 | 321 | mdecorde | |
54 | 321 | mdecorde | /** The factory. */
|
55 | 321 | mdecorde | private def factory; |
56 | 321 | mdecorde | |
57 | 321 | mdecorde | /** The parser. */
|
58 | 321 | mdecorde | private XMLStreamReader parser;
|
59 | 321 | mdecorde | |
60 | 321 | mdecorde | /** The reader. */
|
61 | 321 | mdecorde | private Reader reader; |
62 | 321 | mdecorde | |
63 | 321 | mdecorde | /** The output. */
|
64 | 321 | mdecorde | private def output; |
65 | 321 | mdecorde | |
66 | 321 | mdecorde | /** The solotags. */
|
67 | 321 | mdecorde | ArrayList<String> solotags; |
68 | 321 | mdecorde | |
69 | 321 | mdecorde | /** The lespos. */
|
70 | 321 | mdecorde | HashSet<String> lespos = new HashSet<String>(); |
71 | 321 | mdecorde | |
72 | 321 | mdecorde | /**
|
73 | 321 | mdecorde | * Instantiates a new inject annotations.
|
74 | 321 | mdecorde | *
|
75 | 321 | mdecorde | * @param url the url
|
76 | 321 | mdecorde | * @param annotations the annotations
|
77 | 321 | mdecorde | * @param solotags the solotags
|
78 | 321 | mdecorde | */
|
79 | 321 | mdecorde | public InjectAnnotations(URL url, File annotations, |
80 | 321 | mdecorde | ArrayList<String> solotags) { |
81 | 321 | mdecorde | try {
|
82 | 321 | mdecorde | this.url = url;
|
83 | 321 | mdecorde | this.solotags = solotags;
|
84 | 321 | mdecorde | inputData = url.openStream(); |
85 | 321 | mdecorde | factory = XMLInputFactory.newInstance(); |
86 | 321 | mdecorde | parser = factory.createXMLStreamReader(inputData); |
87 | 321 | mdecorde | |
88 | 321 | mdecorde | reader = new FileReader(annotations); |
89 | 321 | mdecorde | |
90 | 321 | mdecorde | } catch (XMLStreamException ex) {
|
91 | 321 | mdecorde | System.out.println(ex);
|
92 | 321 | mdecorde | } catch (IOException ex) { |
93 | 321 | mdecorde | System.out.println("IOException while parsing "); |
94 | 321 | mdecorde | } |
95 | 321 | mdecorde | } |
96 | 321 | mdecorde | |
97 | 321 | mdecorde | /**
|
98 | 321 | mdecorde | * Creates the output.
|
99 | 321 | mdecorde | *
|
100 | 321 | mdecorde | * @param outfile the outfile
|
101 | 321 | mdecorde | * @return true, if successful
|
102 | 321 | mdecorde | */
|
103 | 321 | mdecorde | private boolean createOutput(File outfile) { |
104 | 321 | mdecorde | try {
|
105 | 321 | mdecorde | File f = outfile;
|
106 | 321 | mdecorde | output = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); |
107 | 321 | mdecorde | |
108 | 321 | mdecorde | return true; |
109 | 321 | mdecorde | } catch (Exception e) { |
110 | 321 | mdecorde | System.out.println(e.getLocalizedMessage());
|
111 | 321 | mdecorde | return false; |
112 | 321 | mdecorde | } |
113 | 321 | mdecorde | } |
114 | 321 | mdecorde | |
115 | 321 | mdecorde | /**
|
116 | 321 | mdecorde | * Gets the next annotation.
|
117 | 321 | mdecorde | *
|
118 | 321 | mdecorde | * @return the next annotation
|
119 | 321 | mdecorde | */
|
120 | 321 | mdecorde | private String getNextAnnotation() { |
121 | 321 | mdecorde | String line = reader.readLine();
|
122 | 321 | mdecorde | while (line.startsWith("<")) |
123 | 321 | mdecorde | line = reader.readLine(); |
124 | 321 | mdecorde | lespos.add(line.split("\t")[1]); |
125 | 321 | mdecorde | return line = line.split("\t")[1]; |
126 | 321 | mdecorde | } |
127 | 321 | mdecorde | |
128 | 321 | mdecorde | /**
|
129 | 321 | mdecorde | * Process.
|
130 | 321 | mdecorde | *
|
131 | 321 | mdecorde | * @param outfile the outfile
|
132 | 321 | mdecorde | * @return true, if successful
|
133 | 321 | mdecorde | */
|
134 | 321 | mdecorde | public boolean process(File outfile) { |
135 | 321 | mdecorde | if (createOutput(outfile)) {
|
136 | 321 | mdecorde | |
137 | 321 | mdecorde | String lastopenlocalname = ""; |
138 | 321 | mdecorde | String localname = ""; |
139 | 321 | mdecorde | try {
|
140 | 321 | mdecorde | for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser |
141 | 321 | mdecorde | .next()) { |
142 | 321 | mdecorde | |
143 | 321 | mdecorde | switch (event) {
|
144 | 321 | mdecorde | case XMLStreamConstants.START_ELEMENT:
|
145 | 321 | mdecorde | localname = parser.getLocalName(); |
146 | 321 | mdecorde | String prefix = parser.getPrefix();
|
147 | 321 | mdecorde | if (prefix == null || prefix == "") |
148 | 321 | mdecorde | prefix = "";
|
149 | 321 | mdecorde | else
|
150 | 321 | mdecorde | prefix += ":";
|
151 | 321 | mdecorde | |
152 | 321 | mdecorde | lastopenlocalname = localname; |
153 | 321 | mdecorde | output.write("\n<" + prefix + localname);
|
154 | 321 | mdecorde | |
155 | 321 | mdecorde | for (int i = 0; i < parser.getAttributeCount(); i++) |
156 | 321 | mdecorde | output.write(" " + parser.getAttributeLocalName(i)
|
157 | 321 | mdecorde | + "=\"" + parser.getAttributeValue(i)
|
158 | 321 | mdecorde | + "\"");
|
159 | 321 | mdecorde | |
160 | 321 | mdecorde | // get annotation
|
161 | 321 | mdecorde | if (localname.equals("t")) |
162 | 321 | mdecorde | output.write(" pos=\"" + getNextAnnotation()
|
163 | 321 | mdecorde | + "\"");
|
164 | 321 | mdecorde | |
165 | 321 | mdecorde | if (solotags.contains(localname))
|
166 | 321 | mdecorde | output.write("/>");
|
167 | 321 | mdecorde | else
|
168 | 321 | mdecorde | output.write(">");
|
169 | 321 | mdecorde | break;
|
170 | 321 | mdecorde | |
171 | 321 | mdecorde | case XMLStreamConstants.END_ELEMENT:
|
172 | 321 | mdecorde | |
173 | 321 | mdecorde | localname = parser.getLocalName(); |
174 | 321 | mdecorde | String prefix = parser.getPrefix();
|
175 | 321 | mdecorde | if (prefix == null || prefix == "") |
176 | 321 | mdecorde | prefix = "";
|
177 | 321 | mdecorde | else
|
178 | 321 | mdecorde | prefix += ":";
|
179 | 321 | mdecorde | |
180 | 321 | mdecorde | switch (localname) {
|
181 | 321 | mdecorde | |
182 | 321 | mdecorde | default:
|
183 | 321 | mdecorde | if (!solotags.contains(localname))
|
184 | 321 | mdecorde | if (lastopenlocalname.equals(localname))
|
185 | 321 | mdecorde | output.write("</" + prefix + localname
|
186 | 321 | mdecorde | + ">");
|
187 | 321 | mdecorde | else
|
188 | 321 | mdecorde | output.write("\n</" + prefix + localname
|
189 | 321 | mdecorde | + ">");
|
190 | 321 | mdecorde | } |
191 | 321 | mdecorde | break;
|
192 | 321 | mdecorde | |
193 | 321 | mdecorde | case XMLStreamConstants.CHARACTERS:
|
194 | 321 | mdecorde | output.write(parser.getText().trim()); |
195 | 321 | mdecorde | break;
|
196 | 321 | mdecorde | } |
197 | 321 | mdecorde | } |
198 | 321 | mdecorde | output.close(); |
199 | 1688 | mdecorde | |
200 | 321 | mdecorde | } catch (XMLStreamException ex) {
|
201 | 321 | mdecorde | System.out.println(ex);
|
202 | 321 | mdecorde | } catch (IOException ex) { |
203 | 321 | mdecorde | System.out.println("IOException while parsing " + inputData); |
204 | 321 | mdecorde | } |
205 | 321 | mdecorde | } |
206 | 1688 | mdecorde | if (parser != null) parser.close(); |
207 | 1688 | mdecorde | if (inputData != null) inputData.close(); |
208 | 321 | mdecorde | } |
209 | 321 | mdecorde | |
210 | 321 | mdecorde | /**
|
211 | 321 | mdecorde | * Gets the feature.
|
212 | 321 | mdecorde | *
|
213 | 321 | mdecorde | * @param f the f
|
214 | 321 | mdecorde | * @return the feature
|
215 | 321 | mdecorde | */
|
216 | 321 | mdecorde | public void getFeature(File f) |
217 | 321 | mdecorde | { |
218 | 321 | mdecorde | Writer writer = new OutputStreamWriter(new FileOutputStream(f) , "UTF-8"); |
219 | 321 | mdecorde | writer.write("<feature name=\"pos\" domain=\"T\">\n")
|
220 | 321 | mdecorde | for(String pos : lespos) |
221 | 321 | mdecorde | writer.write("<value name=\""+pos+"\"></value>\n"); |
222 | 321 | mdecorde | writer.write("</feature>\n")
|
223 | 321 | mdecorde | writer.close(); |
224 | 321 | mdecorde | } |
225 | 321 | mdecorde | |
226 | 321 | mdecorde | /**
|
227 | 321 | mdecorde | * The main method.
|
228 | 321 | mdecorde | *
|
229 | 321 | mdecorde | * @param args the arguments
|
230 | 321 | mdecorde | */
|
231 | 321 | mdecorde | public static void main(String[] args) { |
232 | 321 | mdecorde | |
233 | 321 | mdecorde | String rootDir = "~/xml/beroul/"; |
234 | 321 | mdecorde | new File(rootDir + "/identity/").mkdir(); |
235 | 321 | mdecorde | |
236 | 321 | mdecorde | ArrayList<String> milestones = new ArrayList<String>();// the tags who |
237 | 321 | mdecorde | // you want them
|
238 | 321 | mdecorde | // to stay
|
239 | 321 | mdecorde | // milestones
|
240 | 321 | mdecorde | milestones.add("tagUsage");
|
241 | 321 | mdecorde | milestones.add("pb");
|
242 | 321 | mdecorde | milestones.add("lb");
|
243 | 321 | mdecorde | milestones.add("catRef");
|
244 | 321 | mdecorde | |
245 | 321 | mdecorde | File srcfile = new File(rootDir, "beroul.xml"); |
246 | 321 | mdecorde | File annotationsfiles = new File(rootDir, "result.tt"); |
247 | 321 | mdecorde | File resultfile = new File(rootDir, "beroul-result.xml"); |
248 | 321 | mdecorde | println("identity file : " + srcfile + " to : " + resultfile); |
249 | 321 | mdecorde | |
250 | 321 | mdecorde | def builder = new InjectAnnotations(srcfile.toURL(), annotationsfiles, |
251 | 321 | mdecorde | milestones); |
252 | 321 | mdecorde | builder.process(resultfile); |
253 | 321 | mdecorde | |
254 | 321 | mdecorde | return;
|
255 | 321 | mdecorde | } |
256 | 321 | mdecorde | |
257 | 321 | mdecorde | } |