Révision 3230
tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/CwbMakeAll.java (revision 3230) | ||
---|---|---|
35 | 35 |
import org.txm.libs.cqp.CQPLibPreferences; |
36 | 36 |
import org.txm.searchengine.cqp.clientExceptions.ServerNotFoundException; |
37 | 37 |
import org.txm.utils.OSDetector; |
38 |
import org.txm.utils.io.IOUtils; |
|
38 | 39 |
import org.txm.utils.logger.Log; |
39 | 40 |
|
40 | 41 |
// TODO: Auto-generated Javadoc |
... | ... | |
160 | 161 |
if (debug) { |
161 | 162 |
cmd.add(1, "-D"); //$NON-NLS-1$ |
162 | 163 |
} |
163 |
return run(cmd, monitorOutput, true); // always wait for cwb-makeall to end |
|
164 |
boolean ret = run(cmd, monitorOutput, true); // always wait for cwb-makeall to end |
|
165 |
|
|
166 |
// if (ret) { |
|
167 |
// File regFile = new File(pathToRegistry); |
|
168 |
// ReadRegistryFile registry = new ReadRegistryFile(regFile); |
|
169 |
// registry.read(); |
|
170 |
// |
|
171 |
// File dataDirectory = registry.getDataDirectory(); |
|
172 |
// |
|
173 |
// for (String struct : registry.getSAttributesMap().keySet()) { |
|
174 |
// for (String attr : registry.getSAttributesMap().get(struct)) { |
|
175 |
// File avxFile = new File(dataDirectory, attr+".avx"); |
|
176 |
// File avsFile = new File(dataDirectory, attr+".avs"); |
|
177 |
// File rngFile = new File(dataDirectory, attr+".rng"); |
|
178 |
// File cpos2idFile = new File(dataDirectory, attr+".corpus.idx"); |
|
179 |
// File id2StrFile = new File(dataDirectory, attr+".lexicon.idx"); |
|
180 |
// |
|
181 |
// if (avsFile.exists()) { |
|
182 |
// System.out.println("Building id -> cpos: "+id2StrFile); |
|
183 |
// ArrayList<String> strings = new ArrayList<String>(); |
|
184 |
// String dataString = IOUtils.getText(avsFile); |
|
185 |
// |
|
186 |
// System.out.println("Building cpos -> id: "+cpos2idFile); |
|
187 |
// } |
|
188 |
// } |
|
189 |
// } |
|
190 |
// |
|
191 |
// return ret; |
|
192 |
// } else { |
|
193 |
// return false; |
|
194 |
// } |
|
195 |
return ret; |
|
164 | 196 |
} |
165 | 197 |
|
166 | 198 |
/** |
tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 3230) | ||
---|---|---|
20 | 20 |
*/ |
21 | 21 |
public class ReadRegistryFile { |
22 | 22 |
|
23 |
File registryFile; |
|
23 |
File registryFile, dataDirectory;
|
|
24 | 24 |
|
25 |
String id; |
|
26 |
|
|
25 | 27 |
ArrayList<String> pAttributes; |
26 | 28 |
|
27 | 29 |
ArrayList<String> sAttributes; |
... | ... | |
47 | 49 |
for (String line : IOUtils.getLines(registryFile, System.getProperty("file.encoding"))) { |
48 | 50 |
line = line.trim(); // remove first tab |
49 | 51 |
|
50 |
if (line.startsWith("ATTRIBUTE ")) { |
|
52 |
if (line.startsWith("HOME ")) { |
|
53 |
line = line.substring(5); // remove 'ATTRIBUTE ' |
|
54 |
if (line.startsWith("\"") && line.endsWith("\"")) { |
|
55 |
line = line.substring(1, line.length() - 1); |
|
56 |
} |
|
57 |
dataDirectory = new File(line); |
|
58 |
} else if (line.startsWith("ID ")) { |
|
59 |
line = line.substring(3); // remove 'ATTRIBUTE ' |
|
60 |
id = line; |
|
61 |
} else if (line.startsWith("ATTRIBUTE ")) { |
|
51 | 62 |
line = line.substring(10); // remove 'ATTRIBUTE ' |
52 | 63 |
pAttributes.add(line); |
53 | 64 |
} |
... | ... | |
90 | 101 |
} |
91 | 102 |
|
92 | 103 |
/** |
104 |
* |
|
105 |
* @return the value of the ID field |
|
106 |
*/ |
|
107 |
public String getID() { |
|
108 |
return id; |
|
109 |
} |
|
110 |
|
|
111 |
/** |
|
112 |
* |
|
113 |
* @return the data directory pointed by the value of the HOME field |
|
114 |
*/ |
|
115 |
public File getDataDirectory() { |
|
116 |
return dataDirectory; |
|
117 |
} |
|
118 |
|
|
119 |
/** |
|
93 | 120 |
* Test the CQP index files of each p-attribute and s-attribute properties |
94 | 121 |
* |
95 | 122 |
* @param dataDirectory the directory where the binary files should be found |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/core/preferences/SubcorpusPreferences.java (revision 3230) | ||
---|---|---|
9 | 9 |
|
10 | 10 |
public static String OPTIMIZED_MODE = "optimized_mode"; |
11 | 11 |
|
12 |
public static final String TARGET_STRATEGY = "target_strategy"; |
|
13 |
|
|
12 | 14 |
public SubcorpusPreferences() { |
13 | 15 |
// TODO Auto-generated constructor stub |
14 | 16 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/core/functions/summary/Summary.java (revision 3230) | ||
---|---|---|
75 | 75 |
|
76 | 76 |
QueryResult result = corpus.query(new CQLQuery("<" + properties.get(j).getFullName() + ">[]"), "summary", false); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
77 | 77 |
int[] starts = result.getStarts(); |
78 |
result.drop(); |
|
78 |
//result.drop();
|
|
79 | 79 |
result = corpus.query(new CQLQuery("[]</" + properties.get(j).getFullName() + ">"), "summary", false); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
80 | 80 |
int[] ends = result.getStarts(); |
81 |
result.drop(); |
|
81 |
//result.drop();
|
|
82 | 82 |
// System.out.println("starts: "+Arrays.toString(starts)); |
83 | 83 |
// System.out.println("ends: "+Arrays.toString(ends)); |
84 | 84 |
int[] strcutpos = CQPSearchEngine.getCqiClient().cpos2Struc(properties.get(j).getQualifiedName(), starts); |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/StructuralUnitProperty.java (revision 3230) | ||
---|---|---|
337 | 337 |
List<String> values = getValues(); |
338 | 338 |
return values.toArray(new String[values.size()]); |
339 | 339 |
} |
340 |
|
|
341 |
@Override |
|
342 |
public int[] cpos2Id(int[] positions) throws Exception { |
|
343 |
|
|
344 |
// TODO Auto-generated method stub |
|
345 |
return null; |
|
346 |
} |
|
347 |
|
|
348 |
@Override |
|
349 |
public String[] id2Str(int[] positions) throws Exception { |
|
350 |
|
|
351 |
// TODO Auto-generated method stub |
|
352 |
return null; |
|
353 |
} |
|
340 | 354 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 3230) | ||
---|---|---|
27 | 27 |
// |
28 | 28 |
package org.txm.searchengine.cqp.corpus; |
29 | 29 |
|
30 |
import java.io.File; |
|
30 | 31 |
import java.io.IOException; |
31 | 32 |
import java.util.ArrayList; |
32 | 33 |
import java.util.Collection; |
... | ... | |
546 | 547 |
return null; |
547 | 548 |
} |
548 | 549 |
|
549 |
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), subcorpusCqpId, CQLQuery.fixQuery(query.getQueryString())); |
|
550 |
//CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), subcorpusCqpId, CQLQuery.fixQuery(query.getQueryString()));
|
|
550 | 551 |
// System.out.println("SUBCORPUS: "+subcorpusCqpId+" |
551 | 552 |
// q="+query.getQueryString()); |
552 | 553 |
subcorpus = new Subcorpus(this); |
... | ... | |
607 | 608 |
* @param elem the elem |
608 | 609 |
* @return the subcorpus |
609 | 610 |
* @throws CqiClientException the cqi client exception |
611 |
* @throws InterruptedException |
|
610 | 612 |
*/ |
611 |
public Subcorpus createSubcorpus(String name, SelectionResult selectionResult) throws CqiClientException { |
|
613 |
public Subcorpus createSubcorpus(String name, SelectionResult selectionResult) throws CqiClientException, InterruptedException {
|
|
612 | 614 |
Subcorpus sub = createSubcorpus(this.getStructuralUnit("text"), this.getStructuralUnit("text").getProperty("id"), selectionResult, name); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
613 | 615 |
sub.setSelectionResult(selectionResult); |
616 |
sub.compute(); |
|
614 | 617 |
return sub; |
615 | 618 |
} |
616 | 619 |
|
... | ... | |
1432 | 1435 |
QueryResult queryResult = null; |
1433 | 1436 |
// String queryResultId = queryResultNamePrefix + UUID.randomUUID().toString(); |
1434 | 1437 |
String queryResultId = queryResultNamePrefix + getNextQueryCounter(); |
1435 |
String fixedQuery = CQLQuery.fixQuery(query.getQueryString()); |
|
1436 | 1438 |
|
1437 |
if (fixedQuery.length() > CQLQuery.MAX_CQL_LENGTH) { |
|
1439 |
|
|
1440 |
if (query.getQueryString().length() > CQLQuery.MAX_CQL_LENGTH) { |
|
1438 | 1441 |
try { |
1439 |
Log.warning(NLS.bind("Error: CQL length limit reached ({0}) with {1} length.", CQLQuery.MAX_CQL_LENGTH, fixedQuery.length()));
|
|
1442 |
Log.warning(NLS.bind("Error: CQL length limit reached ({0}) with {1} length.", CQLQuery.MAX_CQL_LENGTH, query.getQueryString().length()));
|
|
1440 | 1443 |
return new FakeQueryResult(queryResultName, this, query, new int[0], new int[0], new int[0]); |
1441 | 1444 |
} |
1442 | 1445 |
catch (InvalidCqpIdException e) { |
... | ... | |
1445 | 1448 |
} |
1446 | 1449 |
} |
1447 | 1450 |
|
1448 |
Log.finest(NLS.bind(TXMCoreMessages.queryOnP0ColonP1InfP2, new String[] { this.getQualifiedCqpId(), queryResultId, fixedQuery })); // $NON-NLS-1$
|
|
1451 |
Log.finest(NLS.bind(TXMCoreMessages.queryOnP0ColonP1InfP2, new String[] { this.getQualifiedCqpId(), queryResultId, query.getQueryString() })); // $NON-NLS-1$
|
|
1449 | 1452 |
// long start = System.currentTimeMillis(); |
1450 | 1453 |
try { |
1451 |
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), queryResultId, fixedQuery); |
|
1454 |
//CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), queryResultId, fixedQuery); |
|
1455 |
|
|
1456 |
String q = query.getQueryString(); |
|
1457 |
if (q.matches("undump \"[^ ]+\";")) { // undump query |
|
1458 |
String path = q.substring(8, q.length() - 2); |
|
1459 |
if (!new File(path).exists()) { |
|
1460 |
Log.severe(NLS.bind("Error: could not compute subcorpus from the dump file: {0}.", path)); |
|
1461 |
return null; |
|
1462 |
} |
|
1463 |
CorpusManager.getCorpusManager().getCqiClient().query(";"); |
|
1464 |
CorpusManager.getCorpusManager().getCqiClient().query("undump "+queryResultId+" < "+q.substring(7)); |
|
1465 |
} else if (q.matches("(union|join|intersect|inter|difference|diff) [^ ]+ [^ ]+;")) { |
|
1466 |
CorpusManager.getCorpusManager().getCqiClient().query(";"); |
|
1467 |
CorpusManager.getCorpusManager().getCqiClient().query(queryResultId+" = "+q); |
|
1468 |
} else { |
|
1469 |
//String fixedQuery = CQLQuery.fixQuery(query.getQueryString()); |
|
1470 |
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), queryResultId, CQLQuery.fixQuery(query.getQueryString())); |
|
1471 |
} |
|
1472 |
|
|
1452 | 1473 |
queryResult = new QueryResult(queryResultId, queryResultName, this, query); |
1453 | 1474 |
|
1454 | 1475 |
// if (save) { |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Property.java (revision 3230) | ||
---|---|---|
33 | 33 |
import org.apache.commons.lang.StringUtils; |
34 | 34 |
import org.txm.core.messages.TXMCoreMessages; |
35 | 35 |
import org.txm.core.preferences.TXMPreferences; |
36 |
import org.txm.searchengine.core.SearchEngine; |
|
36 | 37 |
import org.txm.searchengine.core.SearchEngineProperty; |
38 |
import org.txm.searchengine.core.SearchEnginesManager; |
|
37 | 39 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
38 | 40 |
import org.txm.utils.logger.Log; |
39 | 41 |
|
... | ... | |
45 | 47 |
* |
46 | 48 |
* @author Jean-Philippe Magué, mdecorde |
47 | 49 |
*/ |
48 |
public abstract class Property implements Comparable<Property>, SearchEngineProperty { |
|
49 |
|
|
50 |
/** The name. */ |
|
51 |
protected String name; |
|
52 |
|
|
50 |
public abstract class Property extends org.txm.searchengine.core.Property implements Comparable<Property>, SearchEngineProperty { |
|
51 |
|
|
53 | 52 |
/** The corpus. */ |
54 | 53 |
protected CQPCorpus corpus; |
55 | 54 |
|
... | ... | |
64 | 63 |
* the corpus |
65 | 64 |
*/ |
66 | 65 |
protected Property(String name, CQPCorpus corpus) { |
67 |
this.name = name;
|
|
66 |
super(name);
|
|
68 | 67 |
this.corpus = corpus; |
69 | 68 |
} |
70 | 69 |
|
70 |
public SearchEngine getSearchEngine() { |
|
71 |
return SearchEnginesManager.getCQPSearchEngine(); |
|
72 |
} |
|
73 |
|
|
71 | 74 |
/** |
72 | 75 |
* Gets the qualified name = corpus name + "." + this.getFullName() . |
73 | 76 |
* |
... | ... | |
84 | 87 |
@Override |
85 | 88 |
public abstract String getFullName(); |
86 | 89 |
|
87 |
/** |
|
88 |
* Gets the name. |
|
89 |
* |
|
90 |
* @return the name |
|
91 |
*/ |
|
92 |
@Override |
|
93 |
public String getName() { |
|
94 |
return name; |
|
95 |
} |
|
96 | 90 |
|
97 | 91 |
/** |
98 | 92 |
* Gets the corpus over which the property is defined. |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Subcorpus.java (revision 3230) | ||
---|---|---|
80 | 80 |
* CQP text ids cache |
81 | 81 |
*/ |
82 | 82 |
protected String[] cqpTextIDS = null; |
83 |
|
|
84 |
|
|
83 | 85 |
|
84 | 86 |
/** |
85 | 87 |
* |
... | ... | |
130 | 132 |
Log.warning(NLS.bind("Error: CQL length limit reached ({0}) with {1} length.", CQLQuery.MAX_CQL_LENGTH, pQuery.getQueryString().length())); |
131 | 133 |
return false; |
132 | 134 |
} |
133 |
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(this.pQuery.getQueryString())); |
|
134 | 135 |
|
136 |
String q = this.pQuery.getQueryString(); |
|
137 |
if (q.matches("undump [^ ]+;")) { // undump query |
|
138 |
String path = q.substring(8, q.length() - 2); |
|
139 |
if (!new File(path).exists()) { |
|
140 |
Log.severe(NLS.bind("Error: could not compute subcorpus from the dump file: {0}.", path)); |
|
141 |
return false; |
|
142 |
} |
|
143 |
CorpusManager.getCorpusManager().getCqiClient().query(";"); |
|
144 |
CorpusManager.getCorpusManager().getCqiClient().query("undump "+this.pID+" < "+q.substring(7)); |
|
145 |
} else if (q.matches("(union|join|intersect|inter|difference|diff) [^ ]+ [^ ]+;")) { |
|
146 |
CorpusManager.getCorpusManager().getCqiClient().query(";"); |
|
147 |
CorpusManager.getCorpusManager().getCqiClient().query(this.pID+" = "+q); |
|
148 |
} else { |
|
149 |
CorpusManager.getCorpusManager().getCqiClient().cqpQuery(parent_id, this.pID, CQLQuery.fixQuery(q)); |
|
150 |
// if (q.contains("@[") || q.contains("@\"")) { |
|
151 |
// Log.fine("Target detected in "+q+", recentering on target."); |
|
152 |
// CorpusManager.getCorpusManager().getCqiClient().cqpQuery(this.getQualifiedCqpId(), this.pID, "<target>[]"); |
|
153 |
// } |
|
154 |
} |
|
155 |
|
|
135 | 156 |
this.qresult = new QueryResult(this.pID, this.getUserName(), this.getCorpusParent(), this.pQuery); // getCorpusParent().query(pQuery, this.pID, true); |
136 | 157 |
} |
137 | 158 |
|
Formats disponibles : Unified diff