Révision 1204

tmp/org.txm.treetagger.core/src/org/txm/importer/xmltxm/BuildTTSrc.groovy (revision 1204)
2 2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 3
// Lyon 2, University of Franche-Comté, University of Nice
4 4
// Sophia Antipolis, University of Paris 3.
5
// 
5
//
6 6
// The TXM platform is free software: you can redistribute it
7 7
// and/or modify it under the terms of the GNU General Public
8 8
// License as published by the Free Software Foundation,
9 9
// either version 2 of the License, or (at your option) any
10 10
// later version.
11
// 
11
//
12 12
// The TXM platform is distributed in the hope that it will be
13 13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 15
// PURPOSE. See the GNU General Public License for more
16 16
// details.
17
// 
17
//
18 18
// You should have received a copy of the GNU General
19 19
// Public License along with the TXM platform. If not, see
20 20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
21
//
22
//
23
//
24 24
// $LastChangedDate: 2017-01-24 18:11:42 +0100 (mar. 24 janv. 2017) $
25 25
// $LastChangedRevision: 3400 $
26
// $LastChangedBy: mdecorde $ 
26
// $LastChangedBy: mdecorde $
27 27
//
28 28
package org.txm.importer.xmltxm
29 29

  
......
46 46
 */
47 47

  
48 48
public class BuildTTSrc {
49
	
49

  
50 50
	/** The url. */
51 51
	private def url;
52
	
52

  
53 53
	/** The input data. */
54 54
	private def inputData;
55
	
55

  
56 56
	/** The factory. */
57 57
	private def factory;
58
	
58

  
59 59
	/** The parser. */
60 60
	private XMLStreamReader parser;
61
	
61

  
62 62
	/** The output. */
63 63
	private BufferedWriter output;
64
	
64

  
65 65
	/**
66 66
	 * Instantiates a new builds the tt src.
67 67
	 * uses XML-TXM V2
......
74 74
			inputData = url.openStream();
75 75
			factory = XMLInputFactory.newInstance();
76 76
			parser = factory.createXMLStreamReader(inputData);
77
			
77

  
78 78
		} catch (XMLStreamException ex) {
79 79
			System.out.println(ex);
80 80
		} catch (IOException ex) {
81 81
			System.out.println("IOException while parsing ");
82 82
		}
83 83
	}
84
	
84

  
85 85
	/**
86 86
	 * Creates the output.
87 87
	 *
......
99 99
			return false;
100 100
		}
101 101
	}
102
	
102

  
103 103
	/**
104 104
	 * Process.
105 105
	 *
106 106
	 * @param outfile the outfile
107
     * @param formtype, if multiple form, use this param to choose the correct one, if null takes the first form found
107
	 * @param formtype, if multiple form, use this param to choose the correct one, if null takes the first form found
108 108
	 * @return true, if successful
109 109
	 */
110 110
	public boolean process(File outfile, String formtype) {
111 111
		if (!createOutput(outfile))
112 112
			return false;
113
		
113

  
114
		boolean inW = false
114 115
		boolean flagform = false; // to catch the content of the form tag
115 116
		boolean firstform = false; // to know if its the first form of the w element
116 117
		String form = ""; // the content of the form tag
......
122 123
				switch (event) {
123 124
					case XMLStreamConstants.START_ELEMENT:
124 125
						localname = parser.getLocalName();
126
						
125 127
						switch (localname) {
126 128
							case "w":
127
								//firstform = true;
129
							//firstform = true;
130
								inW = true
128 131
								break;
129 132
							case "form":
130
//								if (firstform) {
131
//									if (formtype != null) {
132
//										if(parser.getAttributeCount() > 0 
133
//											&& parser.getAttributeValue(0).equals(formtype)) // only one attribute in form, type
134
//											flagform = true;
135
//									}
136
//									else
133
								if (inW) {
134
									//								if (firstform) {
135
									//									if (formtype != null) {
136
									//										if(parser.getAttributeCount() > 0
137
									//											&& parser.getAttributeValue(0).equals(formtype)) // only one attribute in form, type
138
									//											flagform = true;
139
									//									}
140
									//									else
137 141
									flagform = true;
138 142
									form = "";
139 143
									firstform = false;
140
								//}
144
									//}
145
								}
141 146
								break;
142 147
							case "s": // TreeTagger can use s tags
143 148
								buffer.append("<s>\n");
......
147 152
					case XMLStreamConstants.END_ELEMENT:
148 153
						localname = parser.getLocalName();
149 154
						switch (localname) {
155
							case "w":
156
								inW = false
157
								break
150 158
							case "form":
151
								flagform = false;
152
								form = form.trim()
153
								if (form.length() == 0) buffer.append("__EMPTY__\n");
154
								else buffer.append(form.replace("\n", "").replace("<", "&lt;")+ "\n");
155
								//buffer.append(form+ "\n"); // its a txt file no need to use entities
159
								if (inW) { // ensure to process a form inside a w
160
									flagform = false;
161
									form = form.trim()
162
									if (form.length() == 0) buffer.append("__EMPTY__\n");
163
									else buffer.append(form.replace("\n", "").replace("<", "&lt;")+ "\n");
164
									//buffer.append(form+ "\n"); // its a txt file no need to use entities
165
								}
156 166
								break;
157
							
167

  
158 168
							case "s":
159 169
								buffer.append("</s>\n");
160 170
								break;
161 171
						}
162 172
						break;
163
					
173

  
164 174
					case XMLStreamConstants.CHARACTERS:
165 175
						if (flagform) {
166 176
							if (parser.getText().length() > 0)
......
181 191
		} catch (Exception ex) {
182 192
			System.out.println(ex);
183 193
			return false;
184
		} 
185
		
194
		}
195

  
186 196
		return true;
187 197
	}
188
	
198

  
189 199
	/**
190 200
	 * The main method.
191 201
	 *
192 202
	 * @param args the arguments
193 203
	 */
194 204
	public static void main(String[] args) {
195
		
205

  
196 206
		String rootDir = "~/xml/rgaqcj/";
197 207
		// new File(rootDir+"/identity/").mkdir();
198
		
208

  
199 209
		ArrayList<String> milestones = new ArrayList<String>();// the tags who
200 210
		// you want them
201 211
		// to stay
......
204 214
		milestones.add("pb");
205 215
		milestones.add("lb");
206 216
		milestones.add("catRef");
207
		
217

  
208 218
		File srcfile = new File(rootDir + "anainline/", "roland-p5.xml");
209 219
		File resultfile = new File(rootDir + "ttsrc/", "roland-p5.tt");
210 220
		println("build ttsrc file : " + srcfile + " to : " + resultfile);
211
		
221

  
212 222
		def builder = new BuildTTSrc(srcfile.toURL(), milestones);
213 223
		builder.process(resultfile);
214
		
224

  
215 225
		return;
216 226
	}
217
	
227

  
218 228
}

Formats disponibles : Unified diff