34 |
34 |
import visuAnalec.vue.Vue;
|
35 |
35 |
|
36 |
36 |
public class URSAnnotationsImporter {
|
37 |
|
|
|
37 |
|
38 |
38 |
protected MainCorpus mainCorpus;
|
|
39 |
|
39 |
40 |
protected Corpus analecCorpus;
|
|
41 |
|
40 |
42 |
protected File annotationsDirectory;
|
|
43 |
|
41 |
44 |
protected Vue analecVue;
|
|
45 |
|
42 |
46 |
protected IProgressMonitor monitor;
|
|
47 |
|
43 |
48 |
protected XMLInputFactory factory;
|
|
49 |
|
44 |
50 |
protected File aamFile;
|
|
51 |
|
45 |
52 |
protected static PersonalNamespaceContext Nscontext = new PersonalNamespaceContext();
|
46 |
|
|
|
53 |
|
47 |
54 |
public URSAnnotationsImporter(IProgressMonitor monitor, File outputDirectory, File aamFile, MainCorpus mainCorpus, Corpus analecCorpus, Vue analecVue) {
|
48 |
55 |
this.annotationsDirectory = outputDirectory;
|
49 |
56 |
this.aamFile = aamFile;
|
... | ... | |
52 |
59 |
this.analecVue = analecVue;
|
53 |
60 |
this.monitor = monitor;
|
54 |
61 |
}
|
55 |
|
|
|
62 |
|
56 |
63 |
public boolean process() throws Exception {
|
57 |
64 |
if (!annotationsDirectory.exists()) {
|
58 |
|
System.out.println("Error: annotationsDirectory does not exist: "+annotationsDirectory.getAbsolutePath());
|
|
65 |
System.out.println("Error: annotationsDirectory does not exist: " + annotationsDirectory.getAbsolutePath());
|
59 |
66 |
return false;
|
60 |
67 |
}
|
61 |
68 |
if (!annotationsDirectory.isDirectory()) {
|
62 |
|
System.out.println("Error: annotationsDirectory is not a directory: "+annotationsDirectory.getAbsolutePath());
|
|
69 |
System.out.println("Error: annotationsDirectory is not a directory: " + annotationsDirectory.getAbsolutePath());
|
63 |
70 |
return false;
|
64 |
71 |
}
|
65 |
72 |
if (!aamFile.exists()) {
|
66 |
|
System.out.println("Warning: aamFile does not exist: "+aamFile.getAbsolutePath());
|
67 |
|
} else {
|
68 |
|
|
|
73 |
System.out.println("Warning: aamFile does not exist: " + aamFile.getAbsolutePath());
|
|
74 |
}
|
|
75 |
else {
|
|
76 |
|
69 |
77 |
if (!FichiersGlozz.importerModeleGlozz(analecCorpus, aamFile)) {
|
70 |
|
System.out.println("Error while importing Glozz model: "+aamFile);
|
|
78 |
System.out.println("Error while importing Glozz model: " + aamFile);
|
71 |
79 |
return false;
|
72 |
80 |
}
|
73 |
81 |
}
|
74 |
82 |
File[] ursFiles = annotationsDirectory.listFiles(new FilenameFilter() {
|
|
83 |
|
75 |
84 |
@Override
|
76 |
85 |
public boolean accept(File dir, String name) {
|
77 |
86 |
return name.matches(".+-urs\\.xml");
|
78 |
87 |
}
|
79 |
88 |
});
|
80 |
|
|
|
89 |
|
81 |
90 |
if (ursFiles == null) {
|
82 |
|
System.out.println("No XML files found in "+annotationsDirectory);
|
|
91 |
System.out.println("No XML files found in " + annotationsDirectory);
|
83 |
92 |
return false;
|
84 |
93 |
}
|
85 |
94 |
if (ursFiles.length == 0) {
|
86 |
|
System.out.println("No XML files found in "+annotationsDirectory);
|
|
95 |
System.out.println("No XML files found in " + annotationsDirectory);
|
87 |
96 |
return false;
|
88 |
97 |
}
|
89 |
98 |
Arrays.sort(ursFiles);
|
90 |
|
|
|
99 |
|
91 |
100 |
factory = XMLInputFactory.newInstance();
|
92 |
101 |
List<String> cqpTextIds = Arrays.asList(mainCorpus.getCorpusTextIdsList());
|
93 |
|
|
94 |
|
List<Integer> all_result_summary = Arrays.asList(0,0,0,0,0,0,0,0);
|
95 |
|
|
96 |
|
if (monitor != null) monitor.subTask("Processing "+ursFiles.length+" texts...");
|
|
102 |
|
|
103 |
List<Integer> all_result_summary = Arrays.asList(0, 0, 0, 0, 0, 0, 0, 0);
|
|
104 |
|
|
105 |
if (monitor != null) monitor.subTask("Processing " + ursFiles.length + " texts...");
|
97 |
106 |
for (File xmlTXMFile : ursFiles) {
|
98 |
107 |
String textid = xmlTXMFile.getName().substring(0, xmlTXMFile.getName().length() - 8);
|
99 |
|
System.out.println("Processing text: "+textid);
|
|
108 |
System.out.println("Processing text: " + textid);
|
100 |
109 |
if (cqpTextIds.contains(textid)) {
|
101 |
|
if (monitor != null) monitor.subTask("Processing "+textid+" text...");
|
|
110 |
if (monitor != null) monitor.subTask("Processing " + textid + " text...");
|
102 |
111 |
// N unit, N unit error, N unit no match error, N unit too much match error, N Relation, N Relation error, N Schema, N Schema error
|
103 |
112 |
List<Integer> result_summary = processText(textid, xmlTXMFile);
|
104 |
|
|
105 |
|
System.out.println(textid+" import report: ");
|
106 |
|
System.out.println(" N Units added: "+result_summary.get(0));
|
107 |
|
System.out.println(" N Units error: "+result_summary.get(1));
|
108 |
|
System.out.println(" N Units no match error: "+result_summary.get(2));
|
109 |
|
System.out.println(" N Units too much match error: "+result_summary.get(3));
|
110 |
|
System.out.println(" N Relations added: "+result_summary.get(4));
|
111 |
|
System.out.println(" N Relations error: "+result_summary.get(5));
|
112 |
|
System.out.println(" N Schemas added: "+result_summary.get(6));
|
113 |
|
System.out.println(" N Schemas error: "+result_summary.get(7));
|
114 |
|
|
115 |
|
for (int i = 0 ; i < all_result_summary.size() ; i++) all_result_summary.set(i, all_result_summary.get(i)+result_summary.get(i));
|
116 |
|
|
117 |
|
} else {
|
118 |
|
System.out.println("Warning: cannot found text with ID="+textid+" in current CQP corpus.");
|
|
113 |
|
|
114 |
System.out.println(textid + " import report: ");
|
|
115 |
System.out.println(" N Units added: " + result_summary.get(0));
|
|
116 |
System.out.println(" N Units error: " + result_summary.get(1));
|
|
117 |
System.out.println(" N Units no match error: " + result_summary.get(2));
|
|
118 |
System.out.println(" N Units too much match error: " + result_summary.get(3));
|
|
119 |
System.out.println(" N Relations added: " + result_summary.get(4));
|
|
120 |
System.out.println(" N Relations error: " + result_summary.get(5));
|
|
121 |
System.out.println(" N Schemas added: " + result_summary.get(6));
|
|
122 |
System.out.println(" N Schemas error: " + result_summary.get(7));
|
|
123 |
|
|
124 |
for (int i = 0; i < all_result_summary.size(); i++)
|
|
125 |
all_result_summary.set(i, all_result_summary.get(i) + result_summary.get(i));
|
|
126 |
|
119 |
127 |
}
|
120 |
|
|
|
128 |
else {
|
|
129 |
System.out.println("Warning: cannot found text with ID=" + textid + " in current CQP corpus.");
|
|
130 |
}
|
|
131 |
|
121 |
132 |
if (monitor != null && monitor.isCanceled()) {
|
122 |
133 |
return false;
|
123 |
134 |
}
|
124 |
135 |
}
|
125 |
|
|
|
136 |
|
126 |
137 |
if (!aamFile.exists()) {
|
127 |
138 |
Vue vue = URSCorpora.getVue(analecCorpus);
|
128 |
139 |
vue.retablirVueParDefaut();
|
129 |
140 |
}
|
130 |
|
|
|
141 |
|
131 |
142 |
System.out.println("Final import report: ");
|
132 |
|
System.out.println(" N Units added: "+all_result_summary.get(0));
|
133 |
|
System.out.println(" N Units error: "+all_result_summary.get(1));
|
134 |
|
System.out.println(" N Units no match error: "+all_result_summary.get(2));
|
135 |
|
System.out.println(" N Units too much match error: "+all_result_summary.get(3));
|
136 |
|
System.out.println(" N Relations added: "+all_result_summary.get(4));
|
137 |
|
System.out.println(" N Relations error: "+all_result_summary.get(5));
|
138 |
|
System.out.println(" N Schemas added: "+all_result_summary.get(6));
|
139 |
|
System.out.println(" N Schemas error: "+all_result_summary.get(7));
|
140 |
|
|
|
143 |
System.out.println(" N Units added: " + all_result_summary.get(0));
|
|
144 |
System.out.println(" N Units error: " + all_result_summary.get(1));
|
|
145 |
System.out.println(" N Units no match error: " + all_result_summary.get(2));
|
|
146 |
System.out.println(" N Units too much match error: " + all_result_summary.get(3));
|
|
147 |
System.out.println(" N Relations added: " + all_result_summary.get(4));
|
|
148 |
System.out.println(" N Relations error: " + all_result_summary.get(5));
|
|
149 |
System.out.println(" N Schemas added: " + all_result_summary.get(6));
|
|
150 |
System.out.println(" N Schemas error: " + all_result_summary.get(7));
|
|
151 |
|
141 |
152 |
return true;
|
142 |
153 |
}
|
143 |
|
|
|
154 |
|
144 |
155 |
private int getPosition(Subcorpus textSubcorpus, HashMap<String, int[]> id2position, String id) {
|
145 |
156 |
int[] positions = id2position.get(id);
|
146 |
157 |
int start = textSubcorpus.getMatches().get(0).getStart();
|
147 |
158 |
int end = textSubcorpus.getMatches().get(0).getEnd();
|
148 |
159 |
if (positions.length == 0) { // no word for id=deb
|
149 |
160 |
return -1;
|
150 |
|
}
|
151 |
|
|
|
161 |
}
|
|
162 |
|
152 |
163 |
for (int p : positions) {
|
153 |
164 |
if (start <= p && p <= end) {
|
154 |
165 |
return p;
|
155 |
166 |
}
|
156 |
167 |
}
|
157 |
|
|
|
168 |
|
158 |
169 |
return -2;
|
159 |
170 |
}
|
160 |
|
|
|
171 |
|
161 |
172 |
private List<Integer> processText(String textid, File xmlTXMFile) throws Exception {
|
162 |
173 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
|
163 |
174 |
// N unit, N unit error, N unit no match error, N unit too much match error, N Relation, N Relation error, N Schema, N Schema error
|
... | ... | |
169 |
180 |
int nRelationsError = 0;
|
170 |
181 |
int nSchemaAdded = 0;
|
171 |
182 |
int nSchemaError = 0;
|
172 |
|
|
|
183 |
|
173 |
184 |
unites.clear();
|
174 |
185 |
relations.clear();
|
175 |
186 |
schemas.clear();
|
176 |
187 |
elementProperties.clear();
|
177 |
|
|
|
188 |
|
178 |
189 |
if (!parseXMLTXMFile(xmlTXMFile)) return null; // fill unites, relations, schemas and elementProperties
|
179 |
|
|
180 |
|
// if (unites.size() > 0) System.out.println(unites);
|
181 |
|
// if (relations.size() > 0) System.out.println(relations);
|
182 |
|
// if (schemas.size() > 0) System.out.println(schemas);
|
183 |
|
// if (elementProperties.size() > 0) System.out.println(elementProperties);
|
184 |
|
// for (String u : unites.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
185 |
|
// for (String u : relations.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
186 |
|
// for (String u : schemas.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
187 |
|
|
188 |
|
|
189 |
|
CQLQuery textQuery = new CQLQuery("[_.text_id=\""+textid+"\"] expand to text");
|
|
190 |
|
|
191 |
// if (unites.size() > 0) System.out.println(unites);
|
|
192 |
// if (relations.size() > 0) System.out.println(relations);
|
|
193 |
// if (schemas.size() > 0) System.out.println(schemas);
|
|
194 |
// if (elementProperties.size() > 0) System.out.println(elementProperties);
|
|
195 |
// for (String u : unites.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
|
196 |
// for (String u : relations.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
|
197 |
// for (String u : schemas.keySet()) if (!elementProperties.containsKey(u+"-fs")) System.out.println("MISSIGN ELEM PROPERTIES: "+u);
|
|
198 |
|
|
199 |
CQLQuery textQuery = new CQLQuery("[_.text_id=\"" + textid + "\"] expand to text");
|
190 |
200 |
Subcorpus textSubCorpus = mainCorpus.createSubcorpus(textQuery, "TXTTMP");
|
191 |
201 |
textSubCorpus.compute();
|
192 |
|
|
193 |
|
HashSet<String> ids = new HashSet<String>();
|
|
202 |
|
|
203 |
HashSet<String> ids = new HashSet<>();
|
194 |
204 |
for (String id : unites.keySet()) {
|
195 |
205 |
ids.add(unites.get(id)[2]);
|
196 |
206 |
ids.add(unites.get(id)[3]);
|
197 |
207 |
}
|
198 |
208 |
String[] idsArray = ids.toArray(new String[ids.size()]);
|
199 |
209 |
int[] idsIds = CQI.str2Id(mainCorpus.getProperty("id").getQualifiedName(), idsArray);
|
200 |
|
|
201 |
|
HashMap<String, int[]> id2position = new HashMap<String, int[]>();
|
202 |
|
for (int i = 0 ; i < idsArray.length ; i++) {
|
|
210 |
|
|
211 |
HashMap<String, int[]> id2position = new HashMap<>();
|
|
212 |
for (int i = 0; i < idsArray.length; i++) {
|
203 |
213 |
int[] positions = CQI.id2Cpos(mainCorpus.getProperty("id").getQualifiedName(), idsIds[i]);
|
204 |
214 |
id2position.put(idsArray[i], positions);
|
205 |
215 |
}
|
206 |
|
|
|
216 |
|
207 |
217 |
Structure structure = analecCorpus.getStructure();
|
208 |
218 |
HashSet<String> unitesStructure = structure.getUnites();
|
209 |
219 |
HashSet<String> relationsStructure = structure.getRelations();
|
210 |
220 |
HashSet<String> schemasStructure = structure.getSchemas();
|
211 |
|
|
|
221 |
|
212 |
222 |
ConsoleProgressBar cpb = new ConsoleProgressBar(unites.size());
|
213 |
223 |
for (String id : unites.keySet()) {
|
214 |
224 |
cpb.tick();
|
... | ... | |
220 |
230 |
String deb = unites.get(id)[2];
|
221 |
231 |
String fin = unites.get(id)[3];
|
222 |
232 |
if (elementProperties.containsKey(ana)) {
|
223 |
|
|
|
233 |
|
224 |
234 |
int start = getPosition(textSubCorpus, id2position, deb);
|
225 |
235 |
int end = getPosition(textSubCorpus, id2position, fin);
|
226 |
|
|
|
236 |
|
227 |
237 |
if (start < 0) {
|
228 |
238 |
nUnitsError++;
|
229 |
239 |
if (start == -1) {
|
230 |
|
System.out.println("WARNING: no position found word id = "+deb);
|
|
240 |
System.out.println("WARNING: no position found word id = " + deb);
|
231 |
241 |
nUnitsNoMatchError++;
|
232 |
|
} else {
|
233 |
|
System.out.println("WARNING: too many positions found for word id = "+deb);
|
|
242 |
}
|
|
243 |
else {
|
|
244 |
System.out.println("WARNING: too many positions found for word id = " + deb);
|
234 |
245 |
nUnitsTooMuchMatchError++;
|
235 |
246 |
}
|
236 |
|
} else if (end < 0) {
|
|
247 |
}
|
|
248 |
else if (end < 0) {
|
237 |
249 |
nUnitsError++;
|
238 |
250 |
if (end == -1) {
|
239 |
|
System.out.println("WARNING: no position found word id = "+fin);
|
|
251 |
System.out.println("WARNING: no position found word id = " + fin);
|
240 |
252 |
nUnitsNoMatchError++;
|
241 |
|
} else {
|
242 |
|
System.out.println("WARNING: too many positions found for word id = "+fin);
|
|
253 |
}
|
|
254 |
else {
|
|
255 |
System.out.println("WARNING: too many positions found for word id = " + fin);
|
243 |
256 |
nUnitsTooMuchMatchError++;
|
244 |
257 |
}
|
245 |
|
} else { // OK
|
246 |
|
|
247 |
|
//System.out.println("create unite: "+type+" ["+deb+", "+fin+"]");
|
|
258 |
}
|
|
259 |
else { // OK
|
|
260 |
|
|
261 |
// System.out.println("create unite: "+type+" ["+deb+", "+fin+"]");
|
248 |
262 |
Unite unite = analecCorpus.addUniteSaisie(type, start, end);
|
249 |
263 |
HashMap<String, String> props = elementProperties.get(ana);
|
250 |
264 |
HashSet<String> nomsProps = structure.getNomsProps(Unite.class, type);
|
... | ... | |
257 |
271 |
unitesRef.put(id, unite);
|
258 |
272 |
nUnitsAdded++;
|
259 |
273 |
}
|
260 |
|
} else {
|
261 |
|
System.out.println("Warning no properties found for element id="+id+" and ana="+ana);
|
262 |
274 |
}
|
|
275 |
else {
|
|
276 |
System.out.println("Warning no properties found for element id=" + id + " and ana=" + ana);
|
|
277 |
}
|
263 |
278 |
}
|
264 |
279 |
cpb.done();
|
265 |
|
|
|
280 |
|
266 |
281 |
cpb = new ConsoleProgressBar(relations.size());
|
267 |
282 |
for (String id : relations.keySet()) {
|
268 |
283 |
cpb.tick();
|
... | ... | |
272 |
287 |
structure.ajouterType(Relation.class, type);
|
273 |
288 |
}
|
274 |
289 |
String target = relations.get(id)[2];
|
275 |
|
|
|
290 |
|
276 |
291 |
if (elementProperties.containsKey(ana)) {
|
277 |
|
|
|
292 |
|
278 |
293 |
String[] wordsref = target.split(" ");
|
279 |
294 |
String[] wordsid = new String[wordsref.length];
|
280 |
|
for (int i = 0 ; i < wordsref.length ; i++) wordsid[i] = wordsref[i].substring(1);
|
|
295 |
for (int i = 0; i < wordsref.length; i++)
|
|
296 |
wordsid[i] = wordsref[i].substring(1);
|
281 |
297 |
try {
|
282 |
298 |
Unite elt1 = unitesRef.get(wordsid[0]);
|
283 |
299 |
Unite elt2 = unitesRef.get(wordsid[1]);
|
284 |
300 |
if (elt1 != null && elt2 != null) {
|
285 |
|
//System.out.println("create relation: "+type+" ["+deb+", "+fin+"]");
|
286 |
|
|
|
301 |
// System.out.println("create relation: "+type+" ["+deb+", "+fin+"]");
|
|
302 |
|
287 |
303 |
Relation relation = analecCorpus.addRelationSaisie(type, elt1, elt2);
|
288 |
304 |
HashMap<String, String> props = elementProperties.get(ana);
|
289 |
305 |
HashSet<String> nomsProps = structure.getNomsProps(Relation.class, type);
|
... | ... | |
294 |
310 |
relation.putProp(prop, props.get(prop));
|
295 |
311 |
}
|
296 |
312 |
nRelationsAdded++;
|
297 |
|
} else {
|
298 |
|
System.out.println("ERROR: relation element not found "+Arrays.toString(wordsref));
|
|
313 |
}
|
|
314 |
else {
|
|
315 |
System.out.println("ERROR: relation element not found " + Arrays.toString(wordsref));
|
299 |
316 |
nRelationsError++;
|
300 |
317 |
}
|
301 |
|
} catch(Exception e) {
|
302 |
|
|
303 |
318 |
}
|
304 |
|
} else {
|
305 |
|
System.out.println("Warning no properties found for element id="+id);
|
|
319 |
catch (Exception e) {
|
|
320 |
|
|
321 |
}
|
306 |
322 |
}
|
|
323 |
else {
|
|
324 |
System.out.println("Warning no properties found for element id=" + id);
|
|
325 |
}
|
307 |
326 |
}
|
308 |
327 |
cpb.done();
|
309 |
|
|
|
328 |
|
310 |
329 |
cpb = new ConsoleProgressBar(schemas.size());
|
311 |
330 |
for (String id : schemas.keySet()) {
|
312 |
331 |
cpb.tick();
|
... | ... | |
319 |
338 |
if (elementProperties.containsKey(ana)) {
|
320 |
339 |
String[] unitsref = target.split(" ");
|
321 |
340 |
try {
|
322 |
|
//System.out.println("create relation: "+type+" ["+deb+", "+fin+"]");
|
|
341 |
// System.out.println("create relation: "+type+" ["+deb+", "+fin+"]");
|
323 |
342 |
Schema schema = new Schema(type);
|
324 |
343 |
for (String unitid : unitsref) {
|
325 |
344 |
unitid = unitid.substring(1); // remove '#'
|
326 |
345 |
if (unitesRef.containsKey(unitid)) {
|
327 |
346 |
Unite unite = unitesRef.get(unitid);
|
328 |
347 |
schema.ajouter(unite);
|
329 |
|
} else {
|
330 |
|
System.out.println("Warning: missing unit id: "+unitid);
|
|
348 |
}
|
|
349 |
else {
|
|
350 |
System.out.println("Warning: missing unit id: " + unitid);
|
331 |
351 |
nSchemaError++;
|
332 |
352 |
}
|
333 |
353 |
}
|
... | ... | |
341 |
361 |
schema.putProp(prop, props.get(prop));
|
342 |
362 |
}
|
343 |
363 |
nSchemaAdded++;
|
344 |
|
} catch(Exception e) {
|
345 |
|
System.out.println("Error while creating schema with id="+id);
|
346 |
364 |
}
|
347 |
|
} else {
|
348 |
|
System.out.println("Warning no properties found for element id="+id);
|
|
365 |
catch (Exception e) {
|
|
366 |
System.out.println("Error while creating schema with id=" + id);
|
|
367 |
}
|
349 |
368 |
}
|
|
369 |
else {
|
|
370 |
System.out.println("Warning no properties found for element id=" + id);
|
|
371 |
}
|
350 |
372 |
}
|
351 |
373 |
cpb.done();
|
352 |
|
|
|
374 |
|
353 |
375 |
textSubCorpus.delete();
|
354 |
376 |
return Arrays.asList(nUnitsAdded, nUnitsError, nUnitsNoMatchError, nUnitsTooMuchMatchError,
|
355 |
377 |
nRelationsAdded, nRelationsError,
|
356 |
378 |
nSchemaAdded, nSchemaError);
|
357 |
379 |
}
|
358 |
|
|
|
380 |
|
359 |
381 |
private boolean parseXMLTXMFile(File xmlTXMFile) throws XMLStreamException, MalformedURLException, IOException {
|
360 |
382 |
InputStream inputData = xmlTXMFile.toURI().toURL().openStream();
|
361 |
383 |
factory = XMLInputFactory.newInstance();
|
362 |
384 |
XMLStreamReader parser = factory.createXMLStreamReader(inputData);
|
363 |
|
|
|
385 |
|
364 |
386 |
String currentType = null;
|
365 |
387 |
String currentN = null;
|
366 |
388 |
String currentAna = null;
|
367 |
389 |
String currentName = null;
|
368 |
390 |
String currentPropValue = null;
|
369 |
|
|
|
391 |
|
370 |
392 |
int processMode = 0; // 0 nothing, 1 elements 2 properties
|
371 |
|
|
|
393 |
|
372 |
394 |
if (!goToStandOff(parser)) {
|
373 |
|
System.out.println("Error: cannot find the 'standOff' element in "+xmlTXMFile);
|
|
395 |
System.out.println("Error: cannot find the 'standOff' element in " + xmlTXMFile);
|
374 |
396 |
return false;
|
375 |
397 |
}
|
376 |
398 |
String localname = null;
|
377 |
399 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
378 |
400 |
switch (event) {
|
379 |
|
case XMLStreamConstants.START_ELEMENT:
|
380 |
|
localname = parser.getLocalName();
|
381 |
|
|
382 |
|
if (localname.equals("annotationGrp")) {
|
383 |
|
currentType = parser.getAttributeValue(null, "type");
|
384 |
|
currentN = parser.getAttributeValue(null, "subtype");
|
385 |
|
if (currentType != null && currentN != null) {
|
386 |
|
processMode = 1;
|
387 |
|
} else {
|
388 |
|
System.out.println("Warning found "+localname+" without 'type' and 'n' attribute");
|
|
401 |
case XMLStreamConstants.START_ELEMENT:
|
|
402 |
localname = parser.getLocalName();
|
|
403 |
|
|
404 |
if (localname.equals("annotationGrp")) {
|
|
405 |
currentType = parser.getAttributeValue(null, "type");
|
|
406 |
currentN = parser.getAttributeValue(null, "subtype");
|
|
407 |
if (currentType != null && currentN != null) {
|
|
408 |
processMode = 1;
|
|
409 |
}
|
|
410 |
else {
|
|
411 |
System.out.println("Warning found " + localname + " without 'type' and 'n' attribute");
|
|
412 |
processMode = 0;
|
|
413 |
}
|
|
414 |
}
|
|
415 |
else if (localname.equals("div")) {
|
|
416 |
String type = parser.getAttributeValue(null, "type");
|
|
417 |
if (type == null) type = "";
|
|
418 |
if (type.endsWith("-fs"))
|
|
419 |
processMode = 2;
|
|
420 |
}
|
|
421 |
|
|
422 |
if (processMode == 1) {
|
|
423 |
if (localname.equals("span") || localname.equals("link")) {
|
|
424 |
String id = parser.getAttributeValue(null, "id");
|
|
425 |
String ana = parser.getAttributeValue(null, "ana");
|
|
426 |
// System.out.println("SPAN id="+id +" ana="+ana+" currentType="+currentType+" currentN="+currentN);
|
|
427 |
if ("Unit".equals(currentType)) {
|
|
428 |
String from = parser.getAttributeValue(null, "from");
|
|
429 |
String to = parser.getAttributeValue(null, "to");
|
|
430 |
registerUnite(id, ana, currentN, from, to);
|
|
431 |
}
|
|
432 |
else if ("Relation".equals(currentType)) {
|
|
433 |
String target = parser.getAttributeValue(null, "target");
|
|
434 |
registerRelation(id, ana, currentN, target);
|
|
435 |
}
|
|
436 |
else if ("Schema".equals(currentType)) {
|
|
437 |
String target = parser.getAttributeValue(null, "target");
|
|
438 |
registerSchema(id, ana, currentN, target);
|
|
439 |
}
|
|
440 |
}
|
|
441 |
}
|
|
442 |
else if (processMode == 2) {
|
|
443 |
if (localname.equals("fs")) {
|
|
444 |
currentAna = parser.getAttributeValue(null, "id");
|
|
445 |
if (elementProperties.get(currentAna) != null) System.out.println("WARNING: duplicated element properties: " + currentAna);
|
|
446 |
elementProperties.put(currentAna, new HashMap<String, String>());
|
|
447 |
}
|
|
448 |
else if (localname.equals("f")) {
|
|
449 |
currentName = parser.getAttributeValue(null, "name");
|
|
450 |
currentPropValue = "";
|
|
451 |
}
|
|
452 |
}
|
|
453 |
else {
|
|
454 |
// nothing to do
|
|
455 |
}
|
|
456 |
|
|
457 |
break;
|
|
458 |
case XMLStreamConstants.CHARACTERS:
|
|
459 |
if (processMode == 2 && currentAna != null && currentName != null) {
|
|
460 |
currentPropValue += parser.getText();
|
|
461 |
}
|
|
462 |
break;
|
|
463 |
case XMLStreamConstants.END_ELEMENT:
|
|
464 |
localname = parser.getLocalName();
|
|
465 |
if (localname.equals("standOff")) { // stop all
|
|
466 |
parser.close();
|
|
467 |
inputData.close();
|
|
468 |
return true;
|
|
469 |
}
|
|
470 |
else if (processMode == 1 && localname.equals("annotationGrp")) { // stop all
|
|
471 |
currentType = null;
|
|
472 |
currentN = null;
|
389 |
473 |
processMode = 0;
|
390 |
474 |
}
|
391 |
|
} else if (localname.equals("div")) {
|
392 |
|
String type = parser.getAttributeValue(null, "type");
|
393 |
|
if (type == null) type = "";
|
394 |
|
if (type.endsWith("-fs"))
|
395 |
|
processMode = 2;
|
396 |
|
}
|
397 |
|
|
398 |
|
if (processMode == 1) {
|
399 |
|
if (localname.equals("span") || localname.equals("link")) {
|
400 |
|
String id = parser.getAttributeValue(null, "id");
|
401 |
|
String ana = parser.getAttributeValue(null, "ana");
|
402 |
|
//System.out.println("SPAN id="+id +" ana="+ana+" currentType="+currentType+" currentN="+currentN);
|
403 |
|
if ("Unit".equals(currentType)) {
|
404 |
|
String from = parser.getAttributeValue(null, "from");
|
405 |
|
String to = parser.getAttributeValue(null, "to");
|
406 |
|
registerUnite(id, ana, currentN, from, to);
|
407 |
|
} else if ("Relation".equals(currentType)) {
|
408 |
|
String target = parser.getAttributeValue(null, "target");
|
409 |
|
registerRelation(id, ana, currentN, target);
|
410 |
|
} else if ("Schema".equals(currentType)) {
|
411 |
|
String target = parser.getAttributeValue(null, "target");
|
412 |
|
registerSchema(id, ana, currentN, target);
|
|
475 |
else if (processMode == 2 && localname.equals("div")) {
|
|
476 |
processMode = 0;
|
|
477 |
}
|
|
478 |
|
|
479 |
if (processMode == 1) {
|
|
480 |
|
|
481 |
}
|
|
482 |
else if (processMode == 2) {
|
|
483 |
if (localname.equals("fs")) {
|
|
484 |
currentAna = null;
|
413 |
485 |
}
|
|
486 |
else if (localname.equals("f")) {
|
|
487 |
if (currentName != null && currentAna != null) {
|
|
488 |
elementProperties.get(currentAna).put(currentName, currentPropValue);
|
|
489 |
}
|
|
490 |
currentName = null;
|
|
491 |
}
|
414 |
492 |
}
|
415 |
|
} else if (processMode == 2) {
|
416 |
|
if (localname.equals("fs")) {
|
417 |
|
currentAna = parser.getAttributeValue(null, "id");
|
418 |
|
if (elementProperties.get(currentAna) != null) System.out.println("WARNING: duplicated element properties: "+currentAna);
|
419 |
|
elementProperties.put(currentAna, new HashMap<String, String>());
|
420 |
|
} else if (localname.equals("f")) {
|
421 |
|
currentName= parser.getAttributeValue(null, "name");
|
422 |
|
currentPropValue = "";
|
|
493 |
else {
|
|
494 |
// nothing to do
|
423 |
495 |
}
|
424 |
|
} else {
|
425 |
|
// nothing to do
|
426 |
|
}
|
427 |
|
|
428 |
|
break;
|
429 |
|
case XMLStreamConstants.CHARACTERS:
|
430 |
|
if (processMode == 2 && currentAna != null && currentName != null) {
|
431 |
|
currentPropValue += parser.getText();
|
432 |
|
}
|
433 |
|
break;
|
434 |
|
case XMLStreamConstants.END_ELEMENT:
|
435 |
|
localname = parser.getLocalName();
|
436 |
|
if (localname.equals("standOff")) { // stop all
|
|
496 |
|
|
497 |
break;
|
|
498 |
|
|
499 |
case XMLStreamConstants.END_DOCUMENT:
|
437 |
500 |
parser.close();
|
438 |
501 |
inputData.close();
|
439 |
|
return true;
|
440 |
|
} else if (processMode == 1 && localname.equals("annotationGrp")) { // stop all
|
441 |
|
currentType = null;
|
442 |
|
currentN = null;
|
443 |
|
processMode = 0;
|
444 |
|
} else if (processMode == 2 && localname.equals("div")) {
|
445 |
|
processMode = 0;
|
446 |
|
}
|
447 |
|
|
448 |
|
if (processMode == 1) {
|
449 |
|
|
450 |
|
} else if (processMode == 2) {
|
451 |
|
if (localname.equals("fs")) {
|
452 |
|
currentAna = null;
|
453 |
|
} else if (localname.equals("f")) {
|
454 |
|
if (currentName != null && currentAna != null) {
|
455 |
|
elementProperties.get(currentAna).put(currentName, currentPropValue);
|
456 |
|
}
|
457 |
|
currentName = null;
|
458 |
|
}
|
459 |
|
} else {
|
460 |
|
// nothing to do
|
461 |
|
}
|
462 |
|
|
463 |
|
break;
|
464 |
|
|
465 |
|
case XMLStreamConstants.END_DOCUMENT:
|
466 |
|
parser.close();
|
467 |
|
inputData.close();
|
468 |
|
return false;
|
|
502 |
return false;
|
469 |
503 |
}
|
470 |
504 |
}
|
471 |
505 |
parser.close();
|
472 |
506 |
inputData.close();
|
473 |
|
|
|
507 |
|
474 |
508 |
return false; // standOff not found
|
475 |
509 |
}
|
476 |
|
|
477 |
|
HashMap<String, HashMap<String, String>> elementProperties = new HashMap<String, HashMap<String, String>>();
|
478 |
|
HashMap<String, String[]> unites = new HashMap<String, String[]>(); // id -> {ana, type, from, to}
|
479 |
|
HashMap<String, Unite> unitesRef = new HashMap<String, Unite>(); // id -> Unite
|
|
510 |
|
|
511 |
HashMap<String, HashMap<String, String>> elementProperties = new HashMap<>();
|
|
512 |
|
|
513 |
HashMap<String, String[]> unites = new HashMap<>(); // id -> {ana, type, from, to}
|
|
514 |
|
|
515 |
HashMap<String, Unite> unitesRef = new HashMap<>(); // id -> Unite
|
|
516 |
|
480 |
517 |
private void registerUnite(String id, String ana, String type, String from, String to) {
|
481 |
|
String[] data = {ana.substring(1), type, from.substring(5), to.substring(5)};
|
|
518 |
String[] data = { ana.substring(1), type, from.substring(5), to.substring(5) };
|
482 |
519 |
if (unites.containsKey(id)) {
|
483 |
|
System.out.println("Warning: duplicated Unite id: "+id);
|
484 |
|
} else {
|
|
520 |
System.out.println("Warning: duplicated Unite id: " + id);
|
|
521 |
}
|
|
522 |
else {
|
485 |
523 |
unites.put(id, data);
|
486 |
524 |
}
|
487 |
525 |
}
|
488 |
|
HashMap<String, String[]> relations = new HashMap<String, String[]>(); // id -> {ana, type, target}
|
|
526 |
|
|
527 |
HashMap<String, String[]> relations = new HashMap<>(); // id -> {ana, type, target}
|
|
528 |
|
489 |
529 |
private void registerRelation(String id, String ana, String type, String target) {
|
490 |
|
String[] data = {ana.substring(1), type, target};
|
|
530 |
String[] data = { ana.substring(1), type, target };
|
491 |
531 |
if (relations.containsKey(id)) {
|
492 |
|
System.out.println("Warning: duplicated Relation id: "+id);
|
493 |
|
} else {
|
|
532 |
System.out.println("Warning: duplicated Relation id: " + id);
|
|
533 |
}
|
|
534 |
else {
|
494 |
535 |
relations.put(id, data);
|
495 |
536 |
}
|
496 |
537 |
}
|
497 |
|
HashMap<String, String[]> schemas = new HashMap<String, String[]>(); // id -> {ana, type, target}
|
|
538 |
|
|
539 |
HashMap<String, String[]> schemas = new HashMap<>(); // id -> {ana, type, target}
|
|
540 |
|
498 |
541 |
private void registerSchema(String id, String ana, String type, String target) {
|
499 |
|
String[] data = {ana.substring(1), type, target};
|
500 |
|
//System.out.println("R schema: "+id+" : "+Arrays.toString(data));
|
|
542 |
String[] data = { ana.substring(1), type, target };
|
|
543 |
// System.out.println("R schema: "+id+" : "+Arrays.toString(data));
|
501 |
544 |
if (schemas.containsKey(id)) {
|
502 |
|
System.out.println("Warning: duplicated Schema id: "+id);
|
503 |
|
} else {
|
|
545 |
System.out.println("Warning: duplicated Schema id: " + id);
|
|
546 |
}
|
|
547 |
else {
|
504 |
548 |
schemas.put(id, data);
|
505 |
549 |
}
|
506 |
550 |
}
|
507 |
|
|
|
551 |
|
508 |
552 |
private boolean goToStandOff(XMLStreamReader parser) throws XMLStreamException {
|
509 |
553 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) {
|
510 |
554 |
switch (event) {
|
511 |
|
case XMLStreamConstants.START_ELEMENT:
|
512 |
|
String localname = parser.getLocalName();
|
513 |
|
if (localname.equals("standOff")) return true;
|
514 |
|
break;
|
|
555 |
case XMLStreamConstants.START_ELEMENT:
|
|
556 |
String localname = parser.getLocalName();
|
|
557 |
if (localname.equals("standOff")) return true;
|
|
558 |
break;
|
515 |
559 |
}
|
516 |
560 |
}
|
517 |
561 |
return false;
|