Révision 458
tmp/org.txm.tigersearch.rcp/src/org/txm/function/tigersearch/TSIndex.java (revision 458) | ||
---|---|---|
1 |
package org.txm.function.tigersearch; |
|
2 |
|
|
3 |
import ims.tiger.corpus.NT_Node; |
|
4 |
import ims.tiger.corpus.Node; |
|
5 |
import ims.tiger.corpus.Sentence; |
|
6 |
import ims.tiger.corpus.T_Node; |
|
7 |
import ims.tiger.gui.tigergraphviewer.draw.Display_Sentence; |
|
8 |
import ims.tiger.gui.tigergraphviewer.forest.ResultForest; |
|
9 |
import ims.tiger.query.api.MatchResult; |
|
10 |
import ims.tiger.query.internalapi.InternalCorpusQueryManager; |
|
11 |
|
|
12 |
import java.io.File; |
|
13 |
import java.io.IOException; |
|
14 |
import java.util.ArrayList; |
|
15 |
import java.util.Arrays; |
|
16 |
import java.util.HashMap; |
|
17 |
import java.util.HashSet; |
|
18 |
import java.util.List; |
|
19 |
import java.util.regex.Pattern; |
|
20 |
|
|
21 |
import org.txm.Toolbox; |
|
22 |
import org.txm.index.core.functions.Index; |
|
23 |
import org.txm.index.core.functions.Line; |
|
24 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
25 |
import org.txm.searchengine.cqp.CQPEngine; |
|
26 |
import org.txm.searchengine.cqp.ICqiClient; |
|
27 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
28 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
29 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
30 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
31 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
32 |
import org.txm.searchengine.cqp.corpus.Property; |
|
33 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
34 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
|
35 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
36 |
import org.txm.searchengine.cqp.corpus.query.MatchUtils; |
|
37 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
38 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
39 |
import org.txm.searchengine.ts.TSCorpus; |
|
40 |
import org.txm.searchengine.ts.TSCorpusManager; |
|
41 |
import org.txm.searchengine.ts.TSMatch; |
|
42 |
import org.txm.searchengine.ts.TSResult; |
|
43 |
import org.txm.utils.logger.Log; |
|
44 |
|
|
45 |
public class TSIndex extends Index { |
|
46 |
|
|
47 |
|
|
48 |
public TSIndex(Corpus corpus, List<Property> props) |
|
49 |
throws CqiClientException, IOException, CqiServerError { |
|
50 |
super(corpus, new Query("[]"), props); |
|
51 |
} |
|
52 |
|
|
53 |
public TSIndex(Corpus corpus, Query query, List<Property> props) |
|
54 |
throws CqiClientException, IOException, CqiServerError { |
|
55 |
super(corpus, query, props); |
|
56 |
} |
|
57 |
|
|
58 |
public TSIndex(Partition partition, Query query, List<Property> props) |
|
59 |
throws CqiClientException, IOException, CqiServerError { |
|
60 |
super(partition, query, props); |
|
61 |
} |
|
62 |
|
|
63 |
|
|
64 |
boolean isTIgerInitialized = false; |
|
65 |
private TSCorpus tscorpus; |
|
66 |
private TSResult tsresult; |
|
67 |
private boolean initTIGERSearch() { |
|
68 |
if (isTIgerInitialized) return isTIgerInitialized; |
|
69 |
|
|
70 |
String id = getCorpus().getMainCorpus().getName(); |
|
71 |
File configfile = new File(getCorpus().getBaseDirectory(),"tiger/tigersearch.logprop"); |
|
72 |
File registrydir = new File(getCorpus().getBaseDirectory(), "tiger"); |
|
73 |
|
|
74 |
TSCorpusManager manager = new TSCorpusManager(registrydir, configfile); |
|
75 |
if (manager.isInitialized()) { |
|
76 |
tscorpus = manager.getCorpus(id); |
|
77 |
if (tscorpus == null) { |
|
78 |
System.out.println("TIGERSearch corpus not found in "+registrydir); |
|
79 |
isTIgerInitialized = false; |
|
80 |
} |
|
81 |
|
|
82 |
isTIgerInitialized = true; |
|
83 |
} |
|
84 |
return isTIgerInitialized; |
|
85 |
} |
|
86 |
|
|
87 |
@Override |
|
88 |
/** |
|
89 |
* count tokens. |
|
90 |
* |
|
91 |
* @param corp the corp |
|
92 |
* @return true, if successful |
|
93 |
* @throws CqiClientException |
|
94 |
* @throws CqiServerError |
|
95 |
* @throws IOException |
|
96 |
*/ |
|
97 |
protected boolean scanCorpus(Corpus corp) throws CqiClientException, IOException, CqiServerError { |
|
98 |
|
|
99 |
if (!initTIGERSearch()) return false; |
|
100 |
|
|
101 |
// get the cqp result of the query |
|
102 |
// long time = System.currentTimeMillis(); |
|
103 |
// QueryResult result = corp.query(getQuery(), "index", true); //$NON-NLS-1$ |
|
104 |
boolean isTargetUsed = false;//Toolbox.getCqiClient().subCorpusHasField(result.getQualifiedCqpId(), ICqiClient.CQI_CONST_FIELD_TARGET); |
|
105 |
|
|
106 |
// System.out.println("nLines : "+nLines); |
|
107 |
List<Match> matches; |
|
108 |
try { |
|
109 |
matches = getMatchesFromTSQuery(corp); |
|
110 |
} catch (Exception e1) { |
|
111 |
System.out.println("Error while queing TIGERSearch: "+e1); |
|
112 |
e1.printStackTrace(); |
|
113 |
return false; |
|
114 |
} |
|
115 |
int nbresults = matches.size(); |
|
116 |
if (nbresults == 0) { |
|
117 |
return false; |
|
118 |
} |
|
119 |
this.nLines += nbresults; // get number of tokens |
|
120 |
|
|
121 |
// count matches |
|
122 |
// time = System.currentTimeMillis(); |
|
123 |
List<Integer> allpositions = new ArrayList<Integer>(); |
|
124 |
for (int j = 0; j < nbresults; j++) { |
|
125 |
Match match = matches.get(j); |
|
126 |
// beginingOfKeywordsPositions.add(match.getStart()); // get the |
|
127 |
// first index |
|
128 |
// lengthOfKeywords.add(match.size());// get the last index |
|
129 |
if (isTargetUsed) { |
|
130 |
allpositions.add(match.getTarget()); |
|
131 |
} else { |
|
132 |
for (int i = match.getStart(); i <= match.getEnd(); i++) |
|
133 |
allpositions.add(i); |
|
134 |
} |
|
135 |
} |
|
136 |
|
|
137 |
int[] allpositionsarray = new int[allpositions.size()]; |
|
138 |
int pcount = 0; |
|
139 |
for (int p : allpositions) |
|
140 |
allpositionsarray[pcount++] = p; |
|
141 |
|
|
142 |
// time = System.currentTimeMillis(); |
|
143 |
HashMap<Property, int[]> propsId = new HashMap<Property, int[]>(); |
|
144 |
for (Property property : props) { |
|
145 |
try { |
|
146 |
if (property instanceof StructuralUnitProperty) { |
|
147 |
int[] structs = CorpusManager.getCorpusManager() |
|
148 |
.getCqiClient().cpos2Struc( |
|
149 |
property.getQualifiedName(), |
|
150 |
allpositionsarray); |
|
151 |
propsId.put(property, structs); |
|
152 |
} else { |
|
153 |
int[] indices = CorpusManager.getCorpusManager() |
|
154 |
.getCqiClient().cpos2Id( |
|
155 |
property.getQualifiedName(), |
|
156 |
allpositionsarray); |
|
157 |
propsId.put(property, indices); |
|
158 |
} |
|
159 |
} catch (Exception e) { |
|
160 |
System.out.println("Error while properties projection: "+e); |
|
161 |
Log.printStackTrace(e); |
|
162 |
return false; |
|
163 |
} |
|
164 |
} |
|
165 |
// System.out.println("Time recup indices "+(System.currentTimeMillis()-time)); |
|
166 |
int currentIndex = 0; |
|
167 |
// time = System.currentTimeMillis(); |
|
168 |
for (int i = 0; i < nbresults; i++) { |
|
169 |
Line line = new Line(); |
|
170 |
Match match = matches.get(i); |
|
171 |
int size = match.size(); |
|
172 |
if (isTargetUsed) size = 1; |
|
173 |
for (int p = 0; p < props.size(); p++) { |
|
174 |
Property property = props.get(p); |
|
175 |
int[] allprosids = propsId.get(property); |
|
176 |
int[] ids = new int[size]; |
|
177 |
System.arraycopy(allprosids, currentIndex, ids, 0, size); |
|
178 |
line.putIds(property, ids); |
|
179 |
} |
|
180 |
currentIndex += size; |
|
181 |
|
|
182 |
String signature = line.getSignature(); |
|
183 |
|
|
184 |
if (counts.containsKey(signature)) // if the counts contains the |
|
185 |
// signature, increment its |
|
186 |
// corresponding value |
|
187 |
{ |
|
188 |
while (counts.get(signature).size() <= currentpartid) |
|
189 |
counts.get(signature).add(0); |
|
190 |
int c = counts.get(signature).get(currentpartid) + 1; |
|
191 |
counts.get(signature).set(currentpartid, c); |
|
192 |
} else // else initialize count of the signature to 1 |
|
193 |
{ |
|
194 |
// System.out.println("add new sign "+signature+" of line "+line.toString()); |
|
195 |
ArrayList<Integer> tmp = new ArrayList<Integer>(); |
|
196 |
for (int j = 0; j < currentpartid + 1; j++) |
|
197 |
tmp.add(0); |
|
198 |
counts.put(signature, tmp); |
|
199 |
counts.get(signature).set(currentpartid, 1); |
|
200 |
|
|
201 |
lines.add(line); |
|
202 |
} |
|
203 |
} |
|
204 |
// System.out.println("Time count lines "+(System.currentTimeMillis()-time)); |
|
205 |
// System.out.println("took "+(System.currentTimeMillis()-time)); |
|
206 |
return true; |
|
207 |
|
|
208 |
} |
|
209 |
|
|
210 |
private List<Match> getMatchesFromTSQuery(Corpus corp) throws Exception { |
|
211 |
ArrayList<Match> matches = new ArrayList<Match>(); |
|
212 |
tsresult = tscorpus.query(query.toString()); |
|
213 |
boolean debug = false; |
|
214 |
ResultForest forest = tsresult.getForest(); |
|
215 |
MatchResult result = tsresult.getMatchResult(); |
|
216 |
|
|
217 |
int var_ok = 0; |
|
218 |
String label = "@|pivot"; |
|
219 |
Pattern p = Pattern.compile(label); |
|
220 |
for (String var : result.getVariableNames()) { |
|
221 |
if (p.matcher(var).matches()) var_ok++; |
|
222 |
} |
|
223 |
if (var_ok == 0) { |
|
224 |
System.out.println("** Error in TSIndex: no label '@' or 'pivot' found in TIGERSearch query. Aborting."); |
|
225 |
return matches; |
|
226 |
} else if (var_ok > 1) { |
|
227 |
System.out.println("** Error in TSIndex: too many labels '@' or 'pivot' found in TIGERSearch query. Aborting."); |
|
228 |
return matches; |
|
229 |
} |
|
230 |
|
|
231 |
InternalCorpusQueryManager manager = tsresult.getManager(); |
|
232 |
// Display_Sentence match = null; |
|
233 |
// |
|
234 |
// if (forest.isNextMatch()) { |
|
235 |
// match = forest.nextMatch(); |
|
236 |
// } else { |
|
237 |
// match = null; |
|
238 |
// } |
|
239 |
|
|
240 |
HashSet<String> matchingTnodesID = new HashSet<String>(); |
|
241 |
String EDITIONID = "editionId"; |
|
242 |
|
|
243 |
int size = forest.getForestSize(); |
|
244 |
for ( int match_no = 0 ; match_no < size ; match_no++) { |
|
245 |
if (debug) System.out.println("Match "+match_no); |
|
246 |
int sentno = result.getSentenceNumberAt(match_no); |
|
247 |
Sentence sentence = manager.getSentence(sentno); |
|
248 |
|
|
249 |
result.orderSentenceSubmatches(sentno); |
|
250 |
int n = result.getSentenceSubmatchSize(sentno); |
|
251 |
|
|
252 |
for (int iSubMatch=0; iSubMatch<n; iSubMatch++) { |
|
253 |
if (debug) System.out.println(" Sub "+iSubMatch); |
|
254 |
int[] var_values = result.getSentenceSubmatchAt(sentno, iSubMatch); |
|
255 |
int value; |
|
256 |
for (int j=0; j<var_values.length; j++) { |
|
257 |
if (debug) System.out.println(" Var "+j+" name="+result.getVariableName(j)); |
|
258 |
value = var_values[j]; |
|
259 |
if (value>=0) { |
|
260 |
String var_name = result.getVariableName(j); |
|
261 |
Node referred = sentence.getNode(value); |
|
262 |
if (p.matcher(var_name).matches()) { |
|
263 |
if (referred instanceof T_Node) { |
|
264 |
if (debug) System.out.println(" Node: "+referred.getFeature(EDITIONID)); |
|
265 |
matchingTnodesID.add(referred.getFeature(EDITIONID)); |
|
266 |
} else if (referred instanceof NT_Node) { |
|
267 |
for (T_Node c : getTerminals((NT_Node) referred, sentence)) { |
|
268 |
matchingTnodesID.add(c.getFeature(EDITIONID)); |
|
269 |
} |
|
270 |
} |
|
271 |
} |
|
272 |
} |
|
273 |
} |
|
274 |
} |
|
275 |
} |
|
276 |
|
|
277 |
|
|
278 |
// while (match != null) { |
|
279 |
// if (debug) System.out.println("Match: "+forest.getCurrentMatchNumber()+" Nsub="+forest.getSubMatchesSize()); |
|
280 |
// Display_Sentence sub = null; |
|
281 |
// int[] var_values = match.getSentenceSubmatchAt(sentno,i); |
|
282 |
// |
|
283 |
// sub = match; |
|
284 |
// int nlabel; |
|
285 |
// while (sub != null) { |
|
286 |
// if (debug) System.out.println(" Sub: "+forest.getCurrentSubMatchNumber()); |
|
287 |
// Node matchingNode = sub.getMatchSubgraphNode(); |
|
288 |
// //if (debug) System.out.println(matchingNode.getIncomingEdgeLabel()); |
|
289 |
// if (matchingNode instanceof T_Node) { |
|
290 |
// T_Node node = (T_Node)matchingNode; |
|
291 |
// if (debug) System.out.println(" T node: "+node.getFeature(EDITIONID)+" "+node.getFeature("word")); |
|
292 |
// matchingTnodesID.add(node.getFeature(EDITIONID)); |
|
293 |
// } else if (matchingNode instanceof NT_Node) { |
|
294 |
// NT_Node ntnode = (NT_Node)matchingNode; |
|
295 |
// if (debug) System.out.println(" NT node: "+ntnode.getChilds()); |
|
296 |
// for (T_Node node : getTerminals(ntnode, sub)) { |
|
297 |
// if (debug) System.out.println(" T node: "+node.getFeature(EDITIONID)+" "+node.getFeature("word")); |
|
298 |
// matchingTnodesID.add(node.getFeature(EDITIONID)); |
|
299 |
// } |
|
300 |
// } |
|
301 |
// |
|
302 |
// if (forest.isNextSubMatch()) { |
|
303 |
// sub = forest.nextSubMatch(); |
|
304 |
// } else { |
|
305 |
// sub = null; |
|
306 |
// } |
|
307 |
// } |
|
308 |
// |
|
309 |
// if (forest.isNextMatch()) { |
|
310 |
// match = forest.nextMatch(); |
|
311 |
// } else { |
|
312 |
// match = null; |
|
313 |
// } |
|
314 |
// } |
|
315 |
|
|
316 |
HashSet<String> matchingTnodesIDFiltered = new HashSet<String>(); |
|
317 |
for (String s : matchingTnodesID) { |
|
318 |
if (s == null || s.length() == 0 || s.endsWith("_dupl")) { |
|
319 |
|
|
320 |
} else { |
|
321 |
matchingTnodesIDFiltered.add(s); |
|
322 |
} |
|
323 |
} |
|
324 |
|
|
325 |
if (debug) System.out.println("Matching ids: "+matchingTnodesIDFiltered); |
|
326 |
String[] strings = new String[matchingTnodesIDFiltered.size()]; |
|
327 |
int i = 0; |
|
328 |
for (String s : matchingTnodesIDFiltered) strings[i++] = s; |
|
329 |
matchingTnodesIDFiltered = null; |
|
330 |
|
|
331 |
matches = buildMatches_CQI(strings); |
|
332 |
strings = null; |
|
333 |
|
|
334 |
// one word per match ! the word is inside one of the CQP (sub-)Corpus matches |
|
335 |
List<Match> finalMatches = new ArrayList<Match>(); |
|
336 |
List<Match> corpusMatches = corp.getMatches(); |
|
337 |
int iCorpusStart = 0; |
|
338 |
for (int iMatch = 0 ; iMatch < matches.size() ; iMatch++) { |
|
339 |
Match m1 = matches.get(iMatch); |
|
340 |
for (int iCorpus = iCorpusStart ; iCorpus < corpusMatches.size() ; iCorpus++) { |
|
341 |
Match m2 = corpusMatches.get(iCorpus); |
|
342 |
if (m2.getStart() <= m1.getStart() && m1.getEnd() <= m2.getEnd()) { |
|
343 |
finalMatches.add(m1); |
|
344 |
iCorpusStart = iCorpus; // optimizing ! \o/ |
|
345 |
break; |
|
346 |
} |
|
347 |
} |
|
348 |
} |
|
349 |
matches = null; |
|
350 |
corpusMatches = null; |
|
351 |
return finalMatches; |
|
352 |
|
|
353 |
} |
|
354 |
|
|
355 |
private ArrayList<Match> buildMatches_CQI(String[] strings) throws CqiClientException, IOException, CqiServerError { |
|
356 |
boolean debug = false; |
|
357 |
ArrayList<Match> matches = new ArrayList<Match>(); |
|
358 |
AbstractCqiClient CQI = CQPEngine.getCqiClient(); |
|
359 |
Property idProperty = getCorpus().getProperty("id"); |
|
360 |
|
|
361 |
if (debug) System.out.println("Call CQI.str2Id with strings.len="+strings.length); |
|
362 |
if (debug) System.out.println(Arrays.toString(strings)); |
|
363 |
if (debug) System.out.flush(); |
|
364 |
|
|
365 |
int[] ids = CQI.str2Id(idProperty.getQualifiedName(), strings); |
|
366 |
|
|
367 |
if (debug) System.out.println("Call CQI.idList2Cpos with strings.len="+ids.length); |
|
368 |
if (debug) System.out.println(Arrays.toString(ids)); |
|
369 |
if (debug) System.out.flush(); |
|
370 |
|
|
371 |
HashSet<Integer> positionsSet = new HashSet<Integer>(); |
|
372 |
for (int id : ids) { |
|
373 |
int[] positions = CQI.id2Cpos(idProperty.getQualifiedName(), id); |
|
374 |
for( int p : positions) positionsSet.add(p); |
|
375 |
} |
|
376 |
|
|
377 |
int[] positions = new int[positionsSet.size()]; |
|
378 |
int i = 0; |
|
379 |
for (int p : positionsSet) positions[i++] = p; |
|
380 |
positionsSet = null; |
|
381 |
Arrays.sort(positions); |
|
382 |
if (debug) System.out.println("Positions: "+Arrays.toString(positions)); |
|
383 |
|
|
384 |
for (int p : positions) matches.add(new Match(p, p, -1)); |
|
385 |
|
|
386 |
//build matches and regroups matches |
|
387 |
// int previous = -999; |
|
388 |
// int start = -1, end = -1, target = -1; |
|
389 |
// for (int p : positions) { |
|
390 |
// //System.out.println("P="+p+" start="+start+" end="+end+" target="+target+" Prev="+previous); |
|
391 |
// if (p - previous > 1) { |
|
392 |
// if (start >= 0) { |
|
393 |
// matches.add(new Match(start, end, target)); |
|
394 |
// } |
|
395 |
// start = p; |
|
396 |
// } |
|
397 |
// end = p; |
|
398 |
// previous = p; |
|
399 |
// } |
|
400 |
// System.out.println(matches); |
|
401 |
return matches; |
|
402 |
} |
|
403 |
|
|
404 |
|
|
405 |
protected static ArrayList<T_Node> EMPTYTERMINALS = new ArrayList<T_Node>(); |
|
406 |
public static ArrayList<T_Node> getTerminals(NT_Node ntnode, Sentence sub) { |
|
407 |
if (ntnode.getChilds().size() == 0) return EMPTYTERMINALS; |
|
408 |
|
|
409 |
ArrayList<T_Node> terminals = new ArrayList<T_Node>(); |
|
410 |
for (Object o : ntnode.getChilds()) { |
|
411 |
int n = (Integer)o; |
|
412 |
Node node = sub.getNode(n); |
|
413 |
if (node instanceof T_Node) { |
|
414 |
terminals.add((T_Node)node); |
|
415 |
} else if (node instanceof NT_Node) { |
|
416 |
terminals.addAll(getTerminals((NT_Node)node, sub)); |
|
417 |
} |
|
418 |
} |
|
419 |
return terminals; |
|
420 |
} |
|
421 |
} |
|
0 | 422 |
tmp/org.txm.tigersearch.rcp/src/org/txm/function/tigersearch/TIGERSearch.java (revision 458) | ||
---|---|---|
1 |
package org.txm.function.tigersearch; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.util.Arrays; |
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import org.eclipse.core.runtime.IAdaptable; |
|
8 |
import org.eclipse.jface.resource.ImageDescriptor; |
|
9 |
import org.eclipse.ui.model.IWorkbenchAdapter; |
|
10 |
import org.eclipse.ui.plugin.AbstractUIPlugin; |
|
11 |
import org.txm.functions.ProgressWatcher; |
|
12 |
import org.txm.functions.TXMCommand; |
|
13 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
14 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
15 |
import org.txm.searchengine.ts.TSCorpus; |
|
16 |
import org.txm.searchengine.ts.TSCorpusManager; |
|
17 |
import org.txm.searchengine.ts.TSMatch; |
|
18 |
import org.txm.searchengine.ts.TSResult; |
|
19 |
import org.txm.tigersearch.commands.ComputeTIGERSearch; |
|
20 |
import org.txm.utils.logger.Log; |
|
21 |
|
|
22 |
public class TIGERSearch extends TXMCommand implements IAdaptable { |
|
23 |
|
|
24 |
TSCorpusManager manager; |
|
25 |
TSCorpus tscorpus; |
|
26 |
boolean ready = false; |
|
27 |
private TSResult tsresult; |
|
28 |
String T, NT; |
|
29 |
private MainCorpus corpus; |
|
30 |
private String query; |
|
31 |
|
|
32 |
public TIGERSearch(Corpus corpus) { |
|
33 |
this.corpus = corpus.getMainCorpus(); |
|
34 |
|
|
35 |
String id = corpus.getName(); |
|
36 |
File configfile = new File(corpus.getBaseDirectory(),"tiger/tigersearch.logprop"); |
|
37 |
File registrydir = new File(corpus.getBaseDirectory(), "tiger"); |
|
38 |
File tscorpusdir = new File(corpus.getBaseDirectory(), "tiger/"+id); |
|
39 |
|
|
40 |
if (!tscorpusdir.exists()) { |
|
41 |
System.out.println("Error: can't find corpus directory: "+tscorpusdir); |
|
42 |
return; |
|
43 |
} |
|
44 |
|
|
45 |
TSCorpusManager manager = new TSCorpusManager(registrydir, configfile); |
|
46 |
if (manager.isInitialized()) { |
|
47 |
|
|
48 |
tscorpus = manager.getCorpus(id); |
|
49 |
if (tscorpus == null) { |
|
50 |
System.out.println("TIGERSearch corpus not found in "+registrydir); |
|
51 |
ready = false; |
|
52 |
|
|
53 |
return; |
|
54 |
} |
|
55 |
|
|
56 |
ready = true; |
|
57 |
} |
|
58 |
} |
|
59 |
|
|
60 |
public boolean compute(String query) throws Exception { |
|
61 |
if (tscorpus == null) return false; |
|
62 |
tsresult = null; |
|
63 |
if (query.length() == 0) { |
|
64 |
return false; |
|
65 |
} |
|
66 |
this.query = query; |
|
67 |
tsresult = tscorpus.query(query); |
|
68 |
tsresult.getFirst(); |
|
69 |
return true; |
|
70 |
} |
|
71 |
|
|
72 |
public boolean isComputed() { |
|
73 |
return tsresult != null; |
|
74 |
} |
|
75 |
|
|
76 |
public boolean isDrawn() { |
|
77 |
return tsresult!= null && T != null && NT != null; |
|
78 |
} |
|
79 |
|
|
80 |
public boolean toSVG(File svgFile, int sent, int sub, String T, String NT) { |
|
81 |
if (tsresult == null) return false; // no result |
|
82 |
this.T = T; |
|
83 |
this.NT = NT; |
|
84 |
|
|
85 |
tsresult.setDisplayProperties(Arrays.asList(T), NT); |
|
86 |
|
|
87 |
try { |
|
88 |
// iterate to the sub th subgraph |
|
89 |
TSMatch match = tsresult.getMatch(sent); |
|
90 |
match.setSubGraph(sub-1); |
|
91 |
svgFile.delete(); |
|
92 |
match.toSVGFile(svgFile); |
|
93 |
if (!svgFile.exists()) { |
|
94 |
System.out.println("Fail to render SVG match for sent="+sent+" and sub="+sub); |
|
95 |
return false; |
|
96 |
} |
|
97 |
} catch(Exception e) { |
|
98 |
Log.printStackTrace(e); |
|
99 |
} |
|
100 |
return true; |
|
101 |
} |
|
102 |
|
|
103 |
public String[] getTProperties() { |
|
104 |
if (tscorpus == null) return new String[0]; |
|
105 |
List<String> tFeatures = tscorpus.getTFeatures(); |
|
106 |
return tFeatures.toArray(new String[tFeatures.size()]); |
|
107 |
} |
|
108 |
|
|
109 |
public String[] getNTProperties() { |
|
110 |
if (tscorpus == null) return new String[0]; |
|
111 |
List<String> ntFeatures = tscorpus.getNTFeatures(); |
|
112 |
return ntFeatures.toArray(new String[ntFeatures.size()]); |
|
113 |
} |
|
114 |
|
|
115 |
public Corpus getParent() { |
|
116 |
return corpus; |
|
117 |
} |
|
118 |
|
|
119 |
public boolean isReady() { |
|
120 |
return ready; |
|
121 |
} |
|
122 |
|
|
123 |
public String getT() { |
|
124 |
return T; |
|
125 |
} |
|
126 |
|
|
127 |
public String getNT() { |
|
128 |
return NT; |
|
129 |
} |
|
130 |
|
|
131 |
public int getNSentences() { |
|
132 |
if (tsresult == null) return 0; |
|
133 |
return tsresult.getNumberOfMatch(); |
|
134 |
} |
|
135 |
|
|
136 |
public int getNSubGraph() { |
|
137 |
if (tsresult == null) return 0; |
|
138 |
return tsresult.getCurrentMatch().getNumberOfSubGraph(); |
|
139 |
} |
|
140 |
|
|
141 |
public int getSent() { |
|
142 |
if (tsresult == null) return 0; |
|
143 |
return tsresult.getCurrentMatchNo(); |
|
144 |
} |
|
145 |
|
|
146 |
public int getSub() { |
|
147 |
if (tsresult == null) return 0; |
|
148 |
return tsresult.getCurrentMatch().getCurrentSubMatchNo(); |
|
149 |
} |
|
150 |
|
|
151 |
/** The WordCloud adapter. */ |
|
152 |
private static IWorkbenchAdapter tsAdapter = new IWorkbenchAdapter() { |
|
153 |
|
|
154 |
@Override |
|
155 |
public Object[] getChildren(Object o) { |
|
156 |
return new Object[0]; |
|
157 |
} |
|
158 |
|
|
159 |
@Override |
|
160 |
public ImageDescriptor getImageDescriptor(Object object) { |
|
161 |
return AbstractUIPlugin.imageDescriptorFromPlugin("TIGERSearchRCP", "icons/functions/TS.png"); //$NON-NLS //$NON-NLS-1$ |
|
162 |
} |
|
163 |
|
|
164 |
@Override |
|
165 |
public String getLabel(Object o) { |
|
166 |
String q = ((TIGERSearch) o).getQuery(); |
|
167 |
return q.substring(0, Math.min(10, q.length())).replaceAll("\n", "")+"..."; |
|
168 |
} |
|
169 |
|
|
170 |
@Override |
|
171 |
public Object getParent(Object o) { |
|
172 |
return ((TIGERSearch) o).getParent(); |
|
173 |
} |
|
174 |
}; |
|
175 |
|
|
176 |
|
|
177 |
@Override |
|
178 |
public Object getAdapter(@SuppressWarnings("rawtypes") Class adapterType) { |
|
179 |
if (adapterType == IWorkbenchAdapter.class) |
|
180 |
return tsAdapter; |
|
181 |
return null; |
|
182 |
} |
|
183 |
|
|
184 |
public String getQuery() { |
|
185 |
return query; |
|
186 |
} |
|
187 |
|
|
188 |
public String toString() { |
|
189 |
return query.substring(0, Math.min(20, query.length())).replaceAll("\n", " ")+" T: "+T+" NT: "+NT; |
|
190 |
} |
|
191 |
|
|
192 |
public boolean openEditor() { |
|
193 |
ComputeTIGERSearch.openEditor(this); |
|
194 |
return true; |
|
195 |
} |
|
196 |
|
|
197 |
@Override |
|
198 |
public boolean toTxt(File outfile, String encoding, String colseparator, |
|
199 |
String txtseparator) throws Exception { |
|
200 |
return tsresult.toXml(outfile); |
|
201 |
} |
|
202 |
|
|
203 |
@Override |
|
204 |
public void clean() { |
|
205 |
|
|
206 |
} |
|
207 |
|
|
208 |
@Override |
|
209 |
public boolean delete() { |
|
210 |
return corpus.removeResult(this); |
|
211 |
} |
|
212 |
|
|
213 |
/** |
|
214 |
* |
|
215 |
* @return the array of extensions to show in the FileDialog SWT widget |
|
216 |
*/ |
|
217 |
public String[] getExportTXTExtensions() { |
|
218 |
return new String[]{"*.xml"}; |
|
219 |
} |
|
220 |
|
|
221 |
@Override |
|
222 |
public boolean compute(ProgressWatcher watcher) throws Exception { |
|
223 |
// TODO Auto-generated method stub |
|
224 |
return false; |
|
225 |
} |
|
226 |
|
|
227 |
@Override |
|
228 |
public String getName() { |
|
229 |
return query; |
|
230 |
} |
|
231 |
|
|
232 |
@Override |
|
233 |
public String getSimpleName() { |
|
234 |
return query; |
|
235 |
} |
|
236 |
|
|
237 |
@Override |
|
238 |
public String getDetails() { |
|
239 |
return query; |
|
240 |
} |
|
241 |
} |
|
0 | 242 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSResult.java (revision 458) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.searchengine.ts; |
|
29 |
|
|
30 |
import ims.tiger.corpus.Header; |
|
31 |
import ims.tiger.export.ExportException; |
|
32 |
import ims.tiger.export.ExportStopException; |
|
33 |
import ims.tiger.export.QueryToSvg; |
|
34 |
import ims.tiger.gui.tigergraphviewer.forest.ResultForest; |
|
35 |
import ims.tiger.query.api.MatchResult; |
|
36 |
import ims.tiger.query.api.QueryEvaluationException; |
|
37 |
import ims.tiger.query.api.QueryFilterException; |
|
38 |
import ims.tiger.query.api.QueryIndexException; |
|
39 |
import ims.tiger.query.api.QueryNormalizationException; |
|
40 |
import ims.tiger.query.api.QueryOptimizationException; |
|
41 |
import ims.tiger.query.internalapi.InternalCorpusQueryManager; |
|
42 |
|
|
43 |
import java.io.File; |
|
44 |
import java.io.FileNotFoundException; |
|
45 |
import java.io.IOException; |
|
46 |
import java.io.UnsupportedEncodingException; |
|
47 |
import java.util.ArrayList; |
|
48 |
import java.util.Arrays; |
|
49 |
import java.util.List; |
|
50 |
|
|
51 |
import javax.xml.parsers.ParserConfigurationException; |
|
52 |
import javax.xml.transform.TransformerException; |
|
53 |
|
|
54 |
import org.txm.core.messages.TXMCoreMessages; |
|
55 |
import org.txm.core.preferences.TBXPreferences; |
|
56 |
import org.txm.core.preferences.TXMPreferences; |
|
57 |
import org.txm.export.ts.ConcordanceBlocks; |
|
58 |
import org.txm.export.ts.MatchInject; |
|
59 |
import org.txm.export.ts.PunctInject; |
|
60 |
import org.txm.importer.ApplyXsl2; |
|
61 |
import org.txm.utils.logger.Log; |
|
62 |
import org.txm.utils.xml.DomUtils; |
|
63 |
import org.w3c.dom.Document; |
|
64 |
import org.xml.sax.SAXException; |
|
65 |
|
|
66 |
// TODO: Auto-generated Javadoc |
|
67 |
/** |
|
68 |
* The Class TSResult. |
|
69 |
*/ |
|
70 |
public class TSResult { |
|
71 |
|
|
72 |
/** The result. */ |
|
73 |
MatchResult result; |
|
74 |
|
|
75 |
/** The forest. */ |
|
76 |
ResultForest forest; |
|
77 |
|
|
78 |
/** The header. */ |
|
79 |
Header header; |
|
80 |
|
|
81 |
/** The ts corpus. */ |
|
82 |
TSCorpus tsCorpus; |
|
83 |
|
|
84 |
/** The matches. */ |
|
85 |
TSMatch[] matches; |
|
86 |
|
|
87 |
/** The current match. */ |
|
88 |
int currentMatchNo = 0; |
|
89 |
TSMatch currentMatch; |
|
90 |
|
|
91 |
/** The query. */ |
|
92 |
private String query; |
|
93 |
|
|
94 |
/** The manager. */ |
|
95 |
private InternalCorpusQueryManager manager; |
|
96 |
|
|
97 |
/** The corpus. */ |
|
98 |
//private TSCorpus corpus; |
|
99 |
|
|
100 |
/** The tsquerytosvg. */ |
|
101 |
QueryToSvg tsquerytosvg = null; |
|
102 |
|
|
103 |
/** |
|
104 |
* Instantiates a new tS result. |
|
105 |
* |
|
106 |
* @param query the query |
|
107 |
* @param tsCorpus the ts corpus |
|
108 |
* @throws QueryFilterException |
|
109 |
* @throws QueryOptimizationException |
|
110 |
* @throws QueryIndexException |
|
111 |
* @throws QueryEvaluationException |
|
112 |
* @throws QueryNormalizationException |
|
113 |
* @throws Exception |
|
114 |
*/ |
|
115 |
public TSResult(String query, TSCorpus tsCorpus) throws Exception { |
|
116 |
|
|
117 |
this.query = query; |
|
118 |
this.manager = tsCorpus.manager; |
|
119 |
//this.corpus = tsCorpus; |
|
120 |
this.tsCorpus = tsCorpus; |
|
121 |
|
|
122 |
result = tsCorpus.manager.processQuery(query); |
|
123 |
forest = new ResultForest(result, tsCorpus.manager); |
|
124 |
header = forest.getHeader(); |
|
125 |
|
|
126 |
matches = new TSMatch[result.size()]; |
|
127 |
tsquerytosvg = new QueryToSvg(manager, result, forest, header, tsCorpus.config); |
|
128 |
} |
|
129 |
|
|
130 |
public ResultForest getForest() { |
|
131 |
return forest; |
|
132 |
} |
|
133 |
|
|
134 |
public MatchResult getMatchResult() { |
|
135 |
return result; |
|
136 |
} |
|
137 |
|
|
138 |
public InternalCorpusQueryManager getManager() { |
|
139 |
return manager; |
|
140 |
} |
|
141 |
|
|
142 |
/** |
|
143 |
* Gets the number of match. |
|
144 |
* |
|
145 |
* @return the number of match |
|
146 |
*/ |
|
147 |
public int getNumberOfMatch() |
|
148 |
{ |
|
149 |
return result.size(); |
|
150 |
} |
|
151 |
|
|
152 |
/** |
|
153 |
* return the no of match, begins with 1. |
|
154 |
* |
|
155 |
* @return the current match no |
|
156 |
*/ |
|
157 |
public int getCurrentMatchNo() { |
|
158 |
return currentMatchNo; |
|
159 |
} |
|
160 |
|
|
161 |
/** |
|
162 |
* return the no of match, begins with 1. |
|
163 |
* |
|
164 |
* @return the current match no |
|
165 |
*/ |
|
166 |
public TSMatch getCurrentMatch() { |
|
167 |
return currentMatch; |
|
168 |
} |
|
169 |
|
|
170 |
/** |
|
171 |
* return the no of match, begins with 1. |
|
172 |
* |
|
173 |
* @return the current sentence no |
|
174 |
*/ |
|
175 |
public int getCurrentSentenceNo() { |
|
176 |
System.out.println(TXMCoreMessages.TSResult_0+this.getNumberOfMatch()); |
|
177 |
System.out.println("current match "+this.getCurrentMatchNo()); //$NON-NLS-1$ |
|
178 |
System.out.println(TXMCoreMessages.TSResult_2+this.getCurrentMatch().getNumberOfSubGraph()); |
|
179 |
System.out.println("current sub graph "+this.getCurrentMatch().getCurrentSubMatchNo()); //$NON-NLS-1$ |
|
180 |
return result.getSentenceNumberAt(this.currentMatchNo); // + 1 ? |
|
181 |
} |
|
182 |
|
|
183 |
/** |
|
184 |
* Gets the match. |
|
185 |
* |
|
186 |
* @param matchNo the no |
|
187 |
* @return the match |
|
188 |
*/ |
|
189 |
public TSMatch getMatch(int matchNo) |
|
190 |
{ |
|
191 |
if (matches[matchNo] == null) { |
|
192 |
TSMatch m = new TSMatch(matchNo, this.tsquerytosvg, this); |
|
193 |
matches[matchNo] = m; |
|
194 |
} |
|
195 |
|
|
196 |
return matches[matchNo] ; |
|
197 |
} |
|
198 |
|
|
199 |
public void setDisplayProperties(List<String> tprops, String feature) { |
|
200 |
tsCorpus.setDisplayProperties(this.header, tprops, feature); |
|
201 |
} |
|
202 |
|
|
203 |
/** |
|
204 |
* Gets the first. |
|
205 |
* |
|
206 |
* @return the first |
|
207 |
*/ |
|
208 |
public TSMatch getFirst() |
|
209 |
{ |
|
210 |
if (result.size() > 0) { |
|
211 |
currentMatchNo = 0; |
|
212 |
currentMatch = getMatch(0); |
|
213 |
return currentMatch; |
|
214 |
} else { |
|
215 |
return null; |
|
216 |
} |
|
217 |
} |
|
218 |
|
|
219 |
/** |
|
220 |
* Gets the last. |
|
221 |
* |
|
222 |
* @return the last |
|
223 |
*/ |
|
224 |
public TSMatch getLast() |
|
225 |
{ |
|
226 |
if (result.size() > 0) { |
|
227 |
currentMatchNo = result.size() -1; |
|
228 |
currentMatch = getMatch(result.size() -1); |
|
229 |
return currentMatch; |
|
230 |
} else { |
|
231 |
return null; |
|
232 |
} |
|
233 |
} |
|
234 |
|
|
235 |
/** |
|
236 |
* Gets the next. |
|
237 |
* |
|
238 |
* @return the next |
|
239 |
*/ |
|
240 |
public TSMatch getNext() |
|
241 |
{ |
|
242 |
int next = currentMatchNo + 1; |
|
243 |
if (result.size() > next) { |
|
244 |
currentMatchNo = next; |
|
245 |
currentMatch = getMatch(next); |
|
246 |
return currentMatch; |
|
247 |
} else { |
|
248 |
return null; |
|
249 |
} |
|
250 |
} |
|
251 |
|
|
252 |
/** |
|
253 |
* Gets the previous. |
|
254 |
* |
|
255 |
* @return the previous |
|
256 |
*/ |
|
257 |
public TSMatch getPrevious() |
|
258 |
{ |
|
259 |
int next = currentMatchNo - 1; |
|
260 |
if (next >= 0 && result.size() > 0) { |
|
261 |
currentMatchNo = next; |
|
262 |
currentMatch = getMatch(next); |
|
263 |
return currentMatch; |
|
264 |
} else { |
|
265 |
return null; |
|
266 |
} |
|
267 |
} |
|
268 |
|
|
269 |
public TSMatch setCurrentMatch(int graphNo) { |
|
270 |
currentMatch = this.getMatch(graphNo); |
|
271 |
currentMatchNo = graphNo; |
|
272 |
return currentMatch; |
|
273 |
} |
|
274 |
|
|
275 |
public boolean toXml(File outfile, File xmlFile, File xslFile) throws ExportException, ExportStopException, IOException, TransformerException |
|
276 |
{ |
|
277 |
return toXml(outfile, xmlFile, xslFile, false, 30, new ArrayList<String>(), new ArrayList<String>()); |
|
278 |
} |
|
279 |
|
|
280 |
public static String CONCSIMPLE = "concordance_simple"; //$NON-NLS-1$ |
|
281 |
public static String CONCMOTPIVOT = "concordance_mot-pivot"; //$NON-NLS-1$ |
|
282 |
public static String CONCBLOCKS = "concordance_blocks"; //$NON-NLS-1$ |
|
283 |
public static String[] EXPORTMETHODS = {CONCSIMPLE, CONCMOTPIVOT, CONCBLOCKS}; |
|
284 |
public boolean toConcordance(File csvFile, String method, int cx, ArrayList<String> ntTypes, ArrayList<String> tTypes, boolean punct) throws Exception |
|
285 |
{ |
|
286 |
if (!Arrays.asList(EXPORTMETHODS).contains(method)) { |
|
287 |
Log.severe(TXMCoreMessages.TSResult_7+method+TXMCoreMessages.TSResult_8+Arrays.toString(EXPORTMETHODS)); |
|
288 |
return false; |
|
289 |
} |
|
290 |
|
|
291 |
File xmlFile = File.createTempFile(csvFile.getName(), "EXPORTBRUT.xml", csvFile.getParentFile()); //$NON-NLS-1$ |
|
292 |
boolean rez = false; |
|
293 |
if (punct) { |
|
294 |
// export match |
|
295 |
//System.out.println("save matches in "+xmlFile); |
|
296 |
this.toXml(xmlFile, false, true, false, false, true, false, 0); |
|
297 |
//FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "EXPORTBRUT.xml")); |
|
298 |
// merge with TigerXMLPOSPNC |
|
299 |
File tmp = File.createTempFile("txm", "AFTERMINJECT.xml", xmlFile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$ |
|
300 |
File tigerXml = new File(tsCorpus.tsmanager.getRegistryPath(), "TigerPnc.xml"); //$NON-NLS-1$ |
|
301 |
//System.out.println("TIGER XML: "+tigerXml); |
|
302 |
if (!tigerXml.exists()) { |
|
303 |
System.out.println(TXMCoreMessages.TSResult_13+tigerXml.getAbsolutePath()); |
|
304 |
return false; |
|
305 |
} |
|
306 |
//System.out.println("Match inject: in "+tmp); |
|
307 |
new MatchInject().script(tigerXml, xmlFile, tmp); |
|
308 |
xmlFile.delete(); |
|
309 |
tmp.renameTo(xmlFile); |
|
310 |
//FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "AFTERMINJECT.xml")); |
|
311 |
} else { |
|
312 |
this.toXml(xmlFile); // export match + corpus |
|
313 |
//FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "FULLEXPORT.xml")); |
|
314 |
} |
|
315 |
|
|
316 |
if (!xmlFile.exists()) { System.out.println(TXMCoreMessages.TSResult_14); return false;} |
|
317 |
|
|
318 |
if (method.equals("concordance_blocks")) { //$NON-NLS-1$ |
|
319 |
ConcordanceBlocks builder = new ConcordanceBlocks(); |
|
320 |
rez = builder.process(xmlFile, csvFile, cx, ntTypes, tTypes); |
|
321 |
} else { // XSL method |
|
322 |
File xslDir = new File(TXMPreferences.getString(TBXPreferences.USER_TXM_HOME, TBXPreferences.PREFERENCES_NODE), "xsl"); //$NON-NLS-1$ |
|
323 |
File xslFile = new File(xslDir, method+".xsl"); //$NON-NLS-1$ |
|
324 |
if (!xslFile.exists()) { |
|
325 |
Log.severe(TXMCoreMessages.TSResult_7+xslFile); |
|
326 |
return false; |
|
327 |
} |
|
328 |
|
|
329 |
rez = toXml(csvFile, xmlFile, xslFile, punct, cx, ntTypes, tTypes); |
|
330 |
} |
|
331 |
xmlFile.delete(); // no more needed |
|
332 |
return rez; |
|
333 |
} |
|
334 |
|
|
335 |
public boolean toXml(File outfile) throws ExportException, ExportStopException |
|
336 |
{ |
|
337 |
toXml(outfile, true, true); |
|
338 |
return true; |
|
339 |
} |
|
340 |
|
|
341 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch, |
|
342 |
boolean includeXmlHeader, boolean includeXMLSentenceStructure, |
|
343 |
boolean includeXMLMatchInformation, boolean refineSchema, int referSchema) throws ExportException, ExportStopException |
|
344 |
{ |
|
345 |
this.tsCorpus.exporter.setConfiguration(includeXmlHeader, |
|
346 |
includeXMLSentenceStructure, |
|
347 |
includeXMLMatchInformation, |
|
348 |
refineSchema, |
|
349 |
referSchema); |
|
350 |
this.tsCorpus.exporter.saveMatchAsXML(result, outfile, includeNonMatch, includeMatch); |
|
351 |
return true; |
|
352 |
} |
|
353 |
|
|
354 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch) throws ExportException, ExportStopException |
|
355 |
{ |
|
356 |
boolean includeXmlHeader = true; |
|
357 |
boolean includeXMLSentenceStructure = true; |
|
358 |
boolean includeXMLMatchInformation = true; |
|
359 |
boolean refineSchema = false; |
|
360 |
int referSchema = 0; |
|
361 |
|
|
362 |
return toXml(outfile, includeNonMatch, includeMatch, includeXmlHeader, includeXMLSentenceStructure, includeXMLMatchInformation, refineSchema, referSchema); |
|
363 |
} |
|
364 |
|
|
365 |
@Deprecated |
|
366 |
private void injectPunct(File xmlfile) |
|
367 |
{ |
|
368 |
System.out.println(TXMCoreMessages.TSResult_18); |
|
369 |
String corpus = this.tsCorpus.id.toUpperCase(); |
|
370 |
try { |
|
371 |
Document dom = DomUtils.load(xmlfile); |
|
372 |
//System.out.println("Getting words of "+corpus); |
|
373 |
Object words = PunctInject.getWords(corpus, ""); //$NON-NLS-1$ |
|
374 |
|
|
375 |
// int i = 0; |
|
376 |
// for(String[] word : (ArrayList<String[]>)words) |
|
377 |
// { |
|
378 |
// if(i++ % 10 == 0) System.out.println(); |
|
379 |
// System.out.print("[\""+word[0]+"\", \""+word[1].replace("\"", "\\\"")+"\"], "); |
|
380 |
// } |
|
381 |
File outfile = File.createTempFile("punct", ".xml", xmlfile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$ |
|
382 |
//System.out.println("Processing "+xmlfile+" to "+outfile); |
|
383 |
Document doc = (Document) new PunctInject().process(dom, words); |
|
384 |
//System.out.println("Saving file"); |
|
385 |
DomUtils.save(doc, outfile); |
|
386 |
|
|
387 |
// FileCopy.copy(outfile, new File(outfile.getParentFile(), "afterinject.xml")); |
|
388 |
|
|
389 |
xmlfile.delete(); |
|
390 |
outfile.renameTo(xmlfile); |
|
391 |
} catch (UnsupportedEncodingException e) { |
|
392 |
// TODO Auto-generated catch block |
|
393 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
394 |
} catch (FileNotFoundException e) { |
|
395 |
// TODO Auto-generated catch block |
|
396 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
397 |
} catch (ParserConfigurationException e) { |
|
398 |
// TODO Auto-generated catch block |
|
399 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
400 |
} catch (SAXException e) { |
|
401 |
// TODO Auto-generated catch block |
|
402 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
403 |
} catch (IOException e) { |
|
404 |
// TODO Auto-generated catch block |
|
405 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
406 |
} |
|
407 |
|
|
408 |
|
|
409 |
} |
|
410 |
|
|
411 |
public boolean toXml(File outFile, File xmlFile, File xslFile, boolean punct, int cxsize, ArrayList<String> ntTypes, ArrayList<String> tTypes) throws ExportException, ExportStopException, IOException, TransformerException |
|
412 |
{ |
|
413 |
ApplyXsl2 xslProc = new ApplyXsl2(xslFile); |
|
414 |
xslProc.setParam("cx", cxsize); //$NON-NLS-1$ |
|
415 |
xslProc.setParam("ntTypes", ntTypes.toString().replaceAll("[\\[\\],]", "")); // ["pos", "lem"] //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
416 |
xslProc.setParam("tTypes", tTypes.toString().replaceAll("[\\[\\],]", "")); // ["truc", "machin"] //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
|
417 |
if (xslProc.process(xmlFile, outFile)) { |
|
418 |
return outFile.exists(); |
|
419 |
} else { |
|
420 |
return false; |
|
421 |
} |
|
422 |
} |
|
423 |
} |
|
0 | 424 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCmd.java (revision 458) | ||
---|---|---|
1 |
package org.txm.searchengine.ts; |
|
2 |
//Copyright © - ANR Textométrie - http://textometrie.ens-lyon.fr |
|
3 |
// |
|
4 |
//This file is part of the TXM platform. |
|
5 |
// |
|
6 |
//The TXM platform is free software: you can redistribute it and/or modif y |
|
7 |
//it under the terms of the GNU General Public License as published by |
|
8 |
//the Free Software Foundation, either version 3 of the License, or |
|
9 |
//(at your option) any later version. |
|
10 |
// |
|
11 |
//The TXM platform is distributed in the hope that it will be useful, |
|
12 |
//but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 |
//GNU General Public License for more details. |
|
15 |
// |
|
16 |
//You should have received a copy of the GNU General Public License |
|
17 |
//along with the TXM platform. If not, see <http://www.gnu.org/licenses/>. |
|
18 |
// |
|
19 |
// @author Nicolas Mazziotta <nicolas.mazziotta -at- ulg.ac.be> |
|
20 |
// |
|
21 |
//$LastChangedDate: $ |
|
22 |
//$LastChangedRevision: $ |
|
23 |
//$LastChangedBy: $ |
|
24 |
import java.io.File; |
|
25 |
|
|
26 |
import org.apache.commons.cli.CommandLine; |
|
27 |
import org.apache.commons.cli.CommandLineParser; |
|
28 |
import org.apache.commons.cli.HelpFormatter; |
|
29 |
import org.apache.commons.cli.Options; |
|
30 |
import org.apache.commons.cli.ParseException; |
|
31 |
import org.apache.commons.cli.PosixParser; |
|
32 |
import org.txm.core.messages.TXMCoreMessages; |
|
33 |
|
|
34 |
/** |
|
35 |
* CommandLine to execute TIGERSearch |
|
36 |
* |
|
37 |
* @author Nicolas Mazziotta <nicolas.mazziotta -at- ulg.ac.be> |
|
38 |
* |
|
39 |
*/ |
|
40 |
public class TSCmd { |
|
41 |
|
|
42 |
/** |
|
43 |
* @param args |
|
44 |
* @throws Exception |
|
45 |
*/ |
|
46 |
|
|
47 |
public static void main(String[] args) throws Exception { |
|
48 |
Options options = new Options(); |
|
49 |
options.addOption("c", true, "config path (dflt: $HOME/TigerSearch/config/tigersearch.logprop)"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
50 |
options.addOption("r", true, "registry path (dflt: $HOME/TigerSearch/TIGERCorpora)"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
51 |
options.addOption("i", true, "corpus id (COMPULSORY)"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
52 |
options.addOption("q", true, "query string (dflt: [])"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
53 |
options.addOption("o", true, "output path (dflt: ."); //$NON-NLS-1$ //$NON-NLS-2$ |
|
54 |
options.addOption("h", false, "print help and exit"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
55 |
CommandLineParser parser = new PosixParser(); |
|
56 |
CommandLine cmd = null; |
|
57 |
HelpFormatter formatter = new HelpFormatter(); |
|
58 |
|
|
59 |
try { |
|
60 |
cmd = parser.parse(options, args); |
|
61 |
} catch (ParseException e) { |
|
62 |
// TODO Auto-generated catch block |
|
63 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
64 |
} |
|
65 |
String homedir = System.getenv("HOME"); //$NON-NLS-1$ |
|
66 |
File configdir = new File(homedir, "TigerSearch/config/tigersearch.logprop"); //$NON-NLS-1$ |
|
67 |
File registrydir = new File(homedir, "TigerSearch/TIGERCorpora"); //$NON-NLS-1$ |
|
68 |
String outdir = "."; //$NON-NLS-1$ |
|
69 |
String id = ""; //$NON-NLS-1$ |
|
70 |
String query = "[]"; //$NON-NLS-1$ |
|
71 |
if (cmd.hasOption("h")) { //$NON-NLS-1$ |
|
72 |
formatter.printHelp( "TSCmd -i Corpus Id [OTHER OPTIONS]", options ); //$NON-NLS-1$ |
|
73 |
return; |
|
74 |
} |
|
75 |
if (cmd.hasOption("c")) { configdir = new File(cmd.getOptionValue("c")); } //$NON-NLS-1$ //$NON-NLS-2$ |
|
76 |
if (cmd.hasOption("r")) { registrydir = new File(cmd.getOptionValue("r")); } //$NON-NLS-1$ //$NON-NLS-2$ |
|
77 |
if (cmd.hasOption("i")) { id = cmd.getOptionValue("i"); } else { //$NON-NLS-1$ //$NON-NLS-2$ |
|
78 |
formatter.printHelp( "TSCmd", options ); //$NON-NLS-1$ |
|
79 |
return; |
|
80 |
} |
|
81 |
if (cmd.hasOption("q")) { query = cmd.getOptionValue("q");} //$NON-NLS-1$ //$NON-NLS-2$ |
|
82 |
if (cmd.hasOption("o")) { outdir = cmd.getOptionValue("o"); } //$NON-NLS-1$ //$NON-NLS-2$ |
|
83 |
|
|
84 |
|
|
85 |
TSCorpusManager manager = new TSCorpusManager(registrydir, configdir); |
|
86 |
|
|
87 |
if (manager.isInitialized()) { |
|
88 |
|
|
89 |
TSCorpus corpus = manager.getCorpus(id); |
|
90 |
TSResult result = corpus.query(query); |
|
91 |
|
|
92 |
System.out.println(TXMCoreMessages.TSCmd_30+ query); |
|
93 |
System.out.println(String.valueOf(result.getNumberOfMatch())+ TXMCoreMessages.TSCmd_31); |
|
94 |
|
|
95 |
// write SVG of all matches |
|
96 |
for (int i = 0 ; i < result.getNumberOfMatch() ; i++) { |
|
97 |
File svgfile = new File(outdir, "result"+String.valueOf(i)+".svg"); //$NON-NLS-1$ //$NON-NLS-2$ |
|
98 |
result.getMatch(i).toSVGFile(svgfile); |
|
99 |
} |
|
100 |
|
|
101 |
} |
|
102 |
|
|
103 |
return; |
|
104 |
|
|
105 |
} |
|
106 |
} |
|
0 | 107 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpusManager.java (revision 458) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.searchengine.ts; |
|
29 |
|
|
30 |
import ims.tiger.gui.tigergraphviewer.TIGERGraphViewerConfiguration; |
|
31 |
|
|
32 |
import java.io.File; |
|
33 |
import java.util.HashMap; |
|
34 |
|
|
35 |
import org.txm.core.messages.TXMCoreMessages; |
|
36 |
|
|
37 |
|
|
38 |
// TODO: Auto-generated Javadoc |
|
39 |
/** |
|
40 |
* The Class TSCorpusManager. |
|
41 |
*/ |
|
42 |
public class TSCorpusManager { |
|
43 |
|
|
44 |
/** The corpora. */ |
|
45 |
HashMap<String, TSCorpus> corpora = new HashMap<String, TSCorpus>(); |
|
46 |
|
|
47 |
/** The registrydir. */ |
|
48 |
private File registrydir; |
|
49 |
|
|
50 |
/** The configdir. */ |
|
51 |
private File configdir; |
|
52 |
|
|
53 |
/** The initok. */ |
|
54 |
private boolean initok = false; |
|
55 |
|
|
56 |
/** |
|
57 |
* Instantiates a new tS corpus manager. |
|
58 |
* |
|
59 |
* @param registrydir the registrydir |
|
60 |
* @param configdir the configdir |
|
61 |
*/ |
|
62 |
public TSCorpusManager(File registrydir, File configdir) |
|
63 |
{ |
|
64 |
this.registrydir = registrydir; |
|
65 |
this.configdir = configdir; |
|
66 |
|
|
67 |
initok = true; |
|
68 |
|
|
69 |
if (!(registrydir.exists() && registrydir.canRead())) { |
|
70 |
System.out.println(TXMCoreMessages.TSCorpusManager_0+registrydir); |
|
71 |
initok = false; |
|
72 |
} |
|
73 |
if (!(configdir.exists() && configdir.canRead())) { |
|
74 |
System.out.println(TXMCoreMessages.TSCorpusManager_1+configdir); |
|
75 |
initok = false; |
|
76 |
} |
|
77 |
} |
|
78 |
|
|
79 |
/** |
|
80 |
* Gets the registry path. |
|
81 |
* |
|
82 |
* @return the registry path |
|
83 |
*/ |
|
84 |
public String getRegistryPath() |
|
85 |
{ |
|
86 |
return registrydir.getAbsolutePath(); |
|
87 |
} |
|
88 |
|
|
89 |
/** |
|
90 |
* Gets the conf path. |
|
91 |
* |
|
92 |
* @return the conf path |
|
93 |
*/ |
|
94 |
public String getconfPath() |
|
95 |
{ |
|
96 |
return configdir.getAbsolutePath(); |
|
97 |
} |
|
98 |
|
|
99 |
/** |
|
100 |
* Checks if is initialized. |
|
101 |
* |
|
102 |
* @return true, if is initialized |
|
103 |
*/ |
|
104 |
public boolean isInitialized() { |
|
105 |
return initok; |
|
106 |
} |
|
107 |
|
|
108 |
/** |
|
109 |
* Gets the corpus. |
|
110 |
* |
|
111 |
* @param id the id |
|
112 |
* @return the corpus |
|
113 |
*/ |
|
114 |
public TSCorpus getCorpus(String id) |
|
115 |
{ |
|
116 |
if (!corpora.containsKey(id)) |
|
117 |
{ |
|
118 |
TSCorpus corpus = new TSCorpus(id, this); |
|
119 |
if (corpus.isOk()) |
|
120 |
corpora.put(id, corpus); |
|
121 |
} |
|
122 |
|
|
123 |
return corpora.get(id); |
|
124 |
} |
|
125 |
} |
|
0 | 126 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/package.html (revision 458) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>Implementation of syntax querying using the TIGERSearch engine. |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TigerXmlIndexing.java (revision 458) | ||
---|---|---|
1 |
package org.txm.searchengine.ts; |
|
2 |
|
|
3 |
import ims.tiger.index.writer.IndexBuilderErrorHandler; |
|
4 |
import ims.tiger.index.writer.SimpleErrorHandler; |
|
5 |
import ims.tiger.index.writer.XMLIndexing; |
|
6 |
|
|
7 |
import java.io.File; |
|
8 |
import java.io.IOException; |
|
9 |
|
|
10 |
import org.apache.log4j.BasicConfigurator; |
|
11 |
import org.txm.core.messages.TXMCoreMessages; |
|
12 |
import org.xml.sax.SAXException; |
|
13 |
|
|
14 |
public class TigerXmlIndexing{ |
|
15 |
|
|
16 |
/* =================================================== */ |
|
17 |
/* MAIN */ |
|
18 |
/* =================================================== */ |
|
19 |
|
|
20 |
public static void main(String args[]) { |
|
21 |
|
|
22 |
BasicConfigurator.configure(); |
|
23 |
|
|
24 |
String userdir = System.getProperty("user.home"); //$NON-NLS-1$ |
|
25 |
File master = new File(userdir, "TXM/corpora/roland/tiger/master_pos.xml"); //$NON-NLS-1$ |
|
26 |
if (!master.exists()) System.out.println(TXMCoreMessages.TigerXmlIndexing_2); |
|
27 |
if (!master.canRead()) System.out.println(TXMCoreMessages.TigerXmlIndexing_3); |
|
28 |
|
|
29 |
String uri = master.getAbsolutePath(); |
|
30 |
File outdir = new File(userdir, "TXM/corpora/roland/tiger/data"); //$NON-NLS-1$ |
|
31 |
outdir.delete(); |
|
32 |
String dest = outdir.getAbsolutePath(); |
|
33 |
|
|
34 |
// String uri = "sources/tiger.xml"; |
|
35 |
// String dest = "/projekte/TIGER/java/testdir/work/TIGERCorpora/TIGER-250/"; |
|
36 |
|
|
37 |
try { |
|
38 |
IndexBuilderErrorHandler handler = new SimpleErrorHandler(dest); |
|
39 |
XMLIndexing indexing = new XMLIndexing("TEST",uri, dest, handler,false); //$NON-NLS-1$ |
|
40 |
indexing.startIndexing(); |
|
41 |
} |
|
42 |
catch (IOException e) { System.out.println(TXMCoreMessages.TigerXmlIndexing_6+e.getMessage()); org.txm.utils.logger.Log.printStackTrace(e); } |
|
43 |
catch (SAXException e) { System.out.println(TXMCoreMessages.TigerXmlIndexing_7+e.getMessage()); org.txm.utils.logger.Log.printStackTrace(e);} |
|
44 |
} |
|
45 |
} |
|
0 | 46 |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpus.java (revision 458) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate:$ |
|
25 |
// $LastChangedRevision:$ |
|
26 |
// $LastChangedBy:$ |
|
27 |
// |
|
28 |
package org.txm.searchengine.ts; |
|
29 |
|
|
30 |
import ims.tiger.corpus.Header; |
|
31 |
import ims.tiger.export.ExportManager; |
|
32 |
import ims.tiger.gui.tigergraphviewer.TIGERGraphViewerConfiguration; |
|
33 |
import ims.tiger.query.internalapi.InternalCorpusQueryManager; |
|
34 |
import ims.tiger.query.internalapi.InternalCorpusQueryManagerLocal; |
|
35 |
|
|
36 |
import java.util.HashMap; |
|
37 |
import java.util.List; |
|
38 |
|
|
39 |
import org.txm.core.messages.TXMCoreMessages; |
|
40 |
|
|
41 |
/** |
|
42 |
* The Class TSCorpus. |
|
43 |
*/ |
|
44 |
public class TSCorpus { |
|
45 |
|
|
46 |
/** The id. */ |
|
47 |
String id; |
|
48 |
|
|
49 |
/** The managers. */ |
|
50 |
public TSCorpusManager tsmanager; |
|
51 |
InternalCorpusQueryManager manager = null; |
|
52 |
ExportManager exporter; |
Formats disponibles : Unified diff