Révision 1141
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/KRAnnotationEngine.java (revision 1141) | ||
---|---|---|
7 | 7 |
import java.util.List; |
8 | 8 |
|
9 | 9 |
import org.eclipse.core.runtime.IProgressMonitor; |
10 |
import org.eclipse.core.runtime.preferences.IEclipsePreferences; |
|
10 | 11 |
import org.osgi.service.prefs.BackingStoreException; |
12 |
import org.osgi.service.prefs.Preferences; |
|
11 | 13 |
import org.txm.annotation.core.AnnotationEngine; |
12 | 14 |
import org.txm.annotation.kr.core.repository.KnowledgeRepository; |
13 | 15 |
import org.txm.annotation.kr.core.repository.KnowledgeRepositoryManager; |
... | ... | |
24 | 26 |
public class KRAnnotationEngine extends AnnotationEngine { |
25 | 27 |
|
26 | 28 |
static HashMap<MainCorpus, AnnotationManager> ams = new HashMap<MainCorpus, AnnotationManager>(); |
29 |
|
|
27 | 30 |
public static String KNOWLEDGE_ACCESS = "access"; |
28 | 31 |
public static String KNOWLEDGE_STRINGS = "strings"; |
29 | 32 |
public static String KNOWLEDGE_TYPEQUERIES = "queries"; |
30 | 33 |
public static String KNOWLEDGE_TYPES = "fields"; |
31 |
|
|
34 |
|
|
32 | 35 |
public static String KNOWLEDGE_TYPEWEBACCESS = "typeaccess"; |
33 | 36 |
|
34 | 37 |
public static boolean canBeAnnotated(MainCorpus corpus) { |
... | ... | |
49 | 52 |
public static AnnotationManager getAnnotationManager(MainCorpus corpus) throws Exception { |
50 | 53 |
AnnotationManager am = ams.get(corpus); |
51 | 54 |
if (am == null) { |
52 |
if (getKnowledgeRepositoryNames(corpus).size() > 0) { |
|
53 |
am = new AnnotationManager(corpus); |
|
54 |
am.initialize(); |
|
55 |
ams.put(corpus, am); |
|
56 |
} |
|
57 |
|
|
55 |
am = new AnnotationManager(corpus); |
|
56 |
am.initialize(); |
|
57 |
ams.put(corpus, am); |
|
58 | 58 |
} |
59 | 59 |
return am; |
60 | 60 |
} |
61 |
|
|
62 |
protected static Element getKnowledgeRepositoriesElement(Element corpusElement) { |
|
63 |
NodeList textsList = corpusElement.getElementsByTagName("knowledgeRepositories"); |
|
64 |
if (textsList.getLength() == 0) { |
|
65 |
Element knowledgeRepositories = corpusElement.getOwnerDocument().createElement("knowledgeRepositories"); |
|
66 |
corpusElement.appendChild(knowledgeRepositories); |
|
67 |
return knowledgeRepositories; |
|
68 |
} else { |
|
69 |
return (Element) textsList.item(0); |
|
70 |
} |
|
71 |
} |
|
72 | 61 |
|
62 |
// protected static Element getKnowledgeRepositoriesElement(Element corpusElement) { |
|
63 |
// NodeList textsList = corpusElement.getElementsByTagName("knowledgeRepositories"); |
|
64 |
// if (textsList.getLength() == 0) { |
|
65 |
// Element knowledgeRepositories = corpusElement.getOwnerDocument().createElement("knowledgeRepositories"); |
|
66 |
// corpusElement.appendChild(knowledgeRepositories); |
|
67 |
// return knowledgeRepositories; |
|
68 |
// } else { |
|
69 |
// return (Element) textsList.item(0); |
|
70 |
// } |
|
71 |
// } |
|
72 |
|
|
73 | 73 |
/** |
74 | 74 |
* |
75 | 75 |
* Lazy load KR |
76 | 76 |
* |
77 | 77 |
* @param name KnowledgeRepository's name |
78 | 78 |
* @return may return null if the KnowledgeRepository does not exist |
79 |
* @throws BackingStoreException |
|
79 | 80 |
*/ |
80 |
public static KnowledgeRepository getKnowledgeRepository(CQPCorpus corpus, String name) { |
|
81 |
public static KnowledgeRepository getKnowledgeRepository(CQPCorpus corpus, String name) throws BackingStoreException {
|
|
81 | 82 |
return KnowledgeRepositoryManager.getKnowledgeRepository(name, corpus.getMainCorpus()); |
82 | 83 |
} |
83 | 84 |
|
... | ... | |
114 | 115 |
//<field type="xxx">yyy</field> |
115 | 116 |
Element typeElement = (Element)typesList.item(i); |
116 | 117 |
String type_id = ""+typeElement.getAttribute(SQLKnowledgeRepository.TYPE_ID); |
117 |
|
|
118 |
|
|
118 | 119 |
String type_name = typeElement.getAttribute(SQLKnowledgeRepository.NAME); |
119 | 120 |
if (type_name == null || type_name.length() == 0) type_name = type_id; |
120 |
|
|
121 |
|
|
121 | 122 |
String type_url = ""+typeElement.getAttribute(SQLKnowledgeRepository.TYPE_URL); |
122 |
|
|
123 |
|
|
123 | 124 |
String type_size = typeElement.getAttribute(SQLKnowledgeRepository.TYPE_SIZE); |
124 | 125 |
if (type_size == null || type_size.length() == 0) type_size = "SMALL"; // show all by default |
125 | 126 |
type_size = type_size.toUpperCase(); |
126 |
|
|
127 |
|
|
127 | 128 |
String type_effect = typeElement.getAttribute(SQLKnowledgeRepository.TYPE_EFFECT); |
128 | 129 |
if (type_effect == null || type_effect.length() == 0) type_effect = "SEGMENT"; // segment annotation by default |
129 | 130 |
type_effect = type_effect.toUpperCase(); |
... | ... | |
141 | 142 |
hashFields.put(SQLKnowledgeRepository.TYPE_EFFECT, type_effect); |
142 | 143 |
fields.put(type_id, hashFields); |
143 | 144 |
} |
144 |
|
|
145 |
|
|
145 | 146 |
NodeList stringsList = e.getElementsByTagName("strings"); |
146 | 147 |
for (int i = 0 ; i < stringsList.getLength() ; i++) { |
147 | 148 |
Element stringsElement = (Element)stringsList.item(i); |
148 | 149 |
String lang = stringsElement.getAttribute("lang"); |
149 | 150 |
if (lang == null) lang = "en"; // default lang is "en" |
150 |
|
|
151 |
|
|
151 | 152 |
HashMap<String, String> values = new HashMap<String, String>(); |
152 | 153 |
strings.put(lang, values); |
153 |
|
|
154 |
|
|
154 | 155 |
NodeList stringList = stringsElement.getElementsByTagName("string"); |
155 | 156 |
for (int j = 0 ; j < stringList.getLength() ; j++) { |
156 | 157 |
Element stringElement = (Element)stringList.item(j); |
... | ... | |
164 | 165 |
repConfiguration.put(KNOWLEDGE_ACCESS, access); |
165 | 166 |
repConfiguration.put(KNOWLEDGE_TYPES, fields); |
166 | 167 |
repConfiguration.put(KNOWLEDGE_STRINGS, strings); |
167 |
|
|
168 |
|
|
168 | 169 |
return repConfiguration; |
169 | 170 |
} |
170 |
|
|
171 |
public static Element getKnowledgeRepositoryElement(BaseOldParameters params, String name) { |
|
172 |
Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement()); |
|
173 |
NodeList repositoriesList = rElement.getElementsByTagName("repository"); |
|
174 |
for (int i = 0 ; i < repositoriesList.getLength() ; i++) { |
|
175 |
Element e = ((Element)repositoriesList.item(i)); |
|
176 |
if (name.equals(e.getAttribute("name"))) { |
|
177 |
return e; |
|
178 |
} |
|
179 |
} |
|
180 |
|
|
181 |
return null; |
|
182 |
} |
|
183 | 171 |
|
184 |
|
|
185 | 172 |
/** |
186 | 173 |
* Utility method to get a knowledge repository configuration |
187 | 174 |
* |
188 |
* @param params |
|
189 |
* @return the repository names |
|
175 |
* @param name the repository name |
|
176 |
* @return the KR configuration map |
|
177 |
* @throws BackingStoreException |
|
190 | 178 |
*/ |
191 |
public static List<String> getKnowledgeRepositoryNames(BaseOldParameters params) { |
|
192 |
ArrayList<String> names = new ArrayList<String>(); |
|
193 |
Element corpusElement = params.getCorpusElement(); |
|
194 |
if (corpusElement == null) return names; |
|
195 |
|
|
196 |
Element rElement = getKnowledgeRepositoriesElement(corpusElement); |
|
197 |
if (rElement == null) return names; |
|
198 |
|
|
199 |
NodeList repositoriesList = rElement.getElementsByTagName("repository"); |
|
200 |
for (int i = 0 ; i < repositoriesList.getLength() ; i++) { |
|
201 |
names.add(((Element)repositoriesList.item(i)).getAttribute("name")); |
|
179 |
public static HashMap<String, HashMap<String, ?>> getKnowledgeRepositoryConfiguration(String name, Preferences preference) throws BackingStoreException { |
|
180 |
HashMap<String, HashMap<String, ?>> repConfiguration = new HashMap<String, HashMap<String, ?>>(); |
|
181 |
|
|
182 |
HashMap<String, String> access = new HashMap<String, String>(); |
|
183 |
HashMap<String, HashMap<String, String>> strings = new HashMap<String, HashMap<String, String>>(); |
|
184 |
HashMap<String, HashMap<String, String>> fields = new HashMap<String, HashMap<String, String>>(); |
|
185 |
|
|
186 |
if (preference == null) return null; |
|
187 |
|
|
188 |
access.put(SQLKnowledgeRepository.NAME, preference.get(SQLKnowledgeRepository.NAME, "default")); |
|
189 |
access.put("version", preference.get("version","0")); |
|
190 |
access.put("mode", preference.get("mode","")); |
|
191 |
access.put(SQLKnowledgeRepository.TYPE_URL, preference.get(SQLKnowledgeRepository.TYPE_URL, "")); |
|
192 |
access.put(SQLKnowledgeRepository.TYPE_RESURL, preference.get(SQLKnowledgeRepository.TYPE_RESURL, "")); |
|
193 |
access.put(SQLKnowledgeRepository.TYPE_TYPEURL, preference.get(SQLKnowledgeRepository.TYPE_TYPEURL, "")); |
|
194 |
access.put(SQLConnection.SQL_ADDRESS, preference.get(SQLConnection.SQL_ADDRESS, "")); |
|
195 |
access.put(SQLConnection.SQL_DRIVER, preference.get(SQLConnection.SQL_DRIVER,"")); |
|
196 |
access.put(SQLConnection.SQL_PORT, preference.get(SQLConnection.SQL_PORT,"")); |
|
197 |
access.put(SQLConnection.SQL_USER, preference.get(SQLConnection.SQL_USER,"")); |
|
198 |
access.put(SQLConnection.SQL_PASSWORD, preference.get(SQLConnection.SQL_PASSWORD,"")); |
|
199 |
|
|
200 |
//<type id="" name> ... </type> |
|
201 |
Preferences typesList = preference.node("type"); |
|
202 |
for (String type_id : typesList.childrenNames()) { |
|
203 |
//<field type="xxx">yyy</field> |
|
204 |
// Element typeElement = (Element)typesList.item(i); |
|
205 |
// String type_id = ""+typeElement.getAttribute(SQLKnowledgeRepository.TYPE_ID); |
|
206 |
|
|
207 |
Preferences typeElement = typesList.node(type_id); |
|
208 |
|
|
209 |
String type_name = typeElement.get(SQLKnowledgeRepository.NAME, type_id); |
|
210 |
|
|
211 |
String type_url = ""+typeElement.get(SQLKnowledgeRepository.TYPE_URL, ""); |
|
212 |
|
|
213 |
String type_size = typeElement.get(SQLKnowledgeRepository.TYPE_SIZE, "SMALL"); |
|
214 |
type_size = type_size.toUpperCase(); |
|
215 |
|
|
216 |
String type_effect = typeElement.get(SQLKnowledgeRepository.TYPE_EFFECT, "SEGMENT"); |
|
217 |
type_effect = type_effect.toUpperCase(); |
|
218 |
|
|
219 |
HashMap<String, String> hashFields = new HashMap<String, String>(); |
|
220 |
Preferences fieldsList = typeElement.node("field"); // contains KR type specific properties |
|
221 |
for (String field_type : fieldsList.keys()) { |
|
222 |
hashFields.put(field_type, fieldsList.get(field_type, "")); |
|
223 |
} |
|
224 |
|
|
225 |
hashFields.put(SQLKnowledgeRepository.TYPE_URL, type_url); |
|
226 |
hashFields.put(SQLKnowledgeRepository.NAME, type_name); |
|
227 |
hashFields.put(SQLKnowledgeRepository.TYPE_SIZE, type_size); |
|
228 |
hashFields.put(SQLKnowledgeRepository.TYPE_EFFECT, type_effect); |
|
229 |
fields.put(type_id, hashFields); |
|
202 | 230 |
} |
203 |
|
|
204 |
if (names.size() == 0) names.add(params.getCorpusName()); // add default KR = properties & structure properties |
|
205 |
return names; |
|
231 |
|
|
232 |
Preferences stringsList = preference.node("strings"); |
|
233 |
for (String lang : stringsList.childrenNames()) { |
|
234 |
Preferences stringsElement = stringsList.node(lang); |
|
235 |
// String lang = stringsElement.getAttribute("lang"); |
|
236 |
// if (lang == null) lang = "en"; // default lang is "en" |
|
237 |
|
|
238 |
HashMap<String, String> values = new HashMap<String, String>(); |
|
239 |
strings.put(lang, values); |
|
240 |
|
|
241 |
for (String key : stringsElement.keys()) { |
|
242 |
values.put(key, stringsElement.get(key, "")); |
|
243 |
} |
|
244 |
} |
|
245 |
|
|
246 |
repConfiguration.put(KNOWLEDGE_ACCESS, access); |
|
247 |
repConfiguration.put(KNOWLEDGE_TYPES, fields); |
|
248 |
repConfiguration.put(KNOWLEDGE_STRINGS, strings); |
|
249 |
|
|
250 |
return repConfiguration; |
|
206 | 251 |
} |
207 |
|
|
252 |
|
|
253 |
// public static Element getKnowledgeRepositoryElement(BaseOldParameters params, String name) { |
|
254 |
// Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement()); |
|
255 |
// NodeList repositoriesList = rElement.getElementsByTagName("repository"); |
|
256 |
// for (int i = 0 ; i < repositoriesList.getLength() ; i++) { |
|
257 |
// Element e = ((Element)repositoriesList.item(i)); |
|
258 |
// if (name.equals(e.getAttribute("name"))) { |
|
259 |
// return e; |
|
260 |
// } |
|
261 |
// } |
|
262 |
// |
|
263 |
// return null; |
|
264 |
// } |
|
265 |
|
|
266 |
|
|
267 |
// /** |
|
268 |
// * Utility method to get a knowledge repository configuration |
|
269 |
// * |
|
270 |
// * @param params |
|
271 |
// * @return the repository names |
|
272 |
// */ |
|
273 |
// public static List<String> getKnowledgeRepositoryNames(BaseOldParameters params) { |
|
274 |
// ArrayList<String> names = new ArrayList<String>(); |
|
275 |
// Element corpusElement = params.getCorpusElement(); |
|
276 |
// if (corpusElement == null) return names; |
|
277 |
// |
|
278 |
// Element rElement = getKnowledgeRepositoriesElement(corpusElement); |
|
279 |
// if (rElement == null) return names; |
|
280 |
// |
|
281 |
// NodeList repositoriesList = rElement.getElementsByTagName("repository"); |
|
282 |
// for (int i = 0 ; i < repositoriesList.getLength() ; i++) { |
|
283 |
// names.add(((Element)repositoriesList.item(i)).getAttribute("name")); |
|
284 |
// } |
|
285 |
// |
|
286 |
// if (names.size() == 0) names.add(params.getCorpusName()); // add default KR = properties & structure properties |
|
287 |
// return names; |
|
288 |
// } |
|
289 |
|
|
208 | 290 |
/** |
209 | 291 |
* |
210 | 292 |
* @param corpus the corpus |
211 |
* @return the repository names |
|
293 |
* @return the repository names the corpus is using
|
|
212 | 294 |
*/ |
213 | 295 |
public static List<String> getKnowledgeRepositoryNames(CQPCorpus corpus) { |
214 | 296 |
if (corpus == null) return new ArrayList<String>(); |
... | ... | |
216 | 298 |
if (base == null) return new ArrayList<String>(); |
217 | 299 |
String[] names; |
218 | 300 |
try { |
219 |
names = base.getPreferencesScope().getNode("KnowledgeRepository").childrenNames(); |
|
301 |
IEclipsePreferences node = base.getPreferencesScope().getNode("KnowledgeRepository"); |
|
302 |
if (!node.nodeExists("DEFAULT")) { |
|
303 |
// create the "DEFAULT" KR configuration |
|
304 |
Preferences krconf = node.node("DEFAULT"); |
|
305 |
krconf.put("name", "DEFAULT"); |
|
306 |
// |
|
307 |
} |
|
308 |
names = node.childrenNames(); |
|
220 | 309 |
return Arrays.asList(names); // getKnowledgeRepositoryNames(params); |
221 | 310 |
} catch (BackingStoreException e) { |
222 | 311 |
e.printStackTrace(); |
223 | 312 |
return new ArrayList<>(); |
224 | 313 |
} |
225 | 314 |
} |
226 |
|
|
315 |
|
|
227 | 316 |
/** |
228 | 317 |
* |
229 | 318 |
* @param corpus |
... | ... | |
236 | 325 |
else |
237 | 326 |
return false; |
238 | 327 |
} |
239 |
|
|
328 |
|
|
240 | 329 |
public void closeAnnotationManager(MainCorpus corpus) { |
241 | 330 |
AnnotationManager am = ams.get(corpus); |
242 | 331 |
if (am != null) { |
... | ... | |
244 | 333 |
am.closeAll(); |
245 | 334 |
} |
246 | 335 |
} |
247 |
|
|
248 |
public static Element createKnowledgeRepositoryElement(BaseOldParameters params, String name) { |
|
249 |
Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement()); |
|
250 |
Element e = rElement.getOwnerDocument().createElement("repository"); |
|
251 |
e.setAttribute("name", name); |
|
252 |
e.setAttribute("mode", "file"); |
|
253 |
e.setAttribute("user", "false"); |
|
254 |
e.setAttribute("password", "false"); |
|
255 |
e.setAttribute("version", "0"); |
|
256 |
|
|
257 |
rElement.appendChild(e); |
|
258 |
return e; |
|
259 |
} |
|
260 |
|
|
336 |
|
|
337 |
// public static Element createKnowledgeRepositoryElement(BaseOldParameters params, String name) {
|
|
338 |
// Element rElement = getKnowledgeRepositoriesElement(params.getCorpusElement());
|
|
339 |
// Element e = rElement.getOwnerDocument().createElement("repository");
|
|
340 |
// e.setAttribute("name", name);
|
|
341 |
// e.setAttribute("mode", "file");
|
|
342 |
// e.setAttribute("user", "false");
|
|
343 |
// e.setAttribute("password", "false");
|
|
344 |
// e.setAttribute("version", "0");
|
|
345 |
// |
|
346 |
// rElement.appendChild(e);
|
|
347 |
// return e;
|
|
348 |
// }
|
|
349 |
|
|
261 | 350 |
@Override |
262 | 351 |
public String getName() { |
263 | 352 |
return "KR Annotation engine"; |
... | ... | |
280 | 369 |
|
281 | 370 |
@Override |
282 | 371 |
public boolean start(IProgressMonitor monitor) { |
372 |
ams = new HashMap<MainCorpus, AnnotationManager>(); |
|
283 | 373 |
return true; |
284 | 374 |
} |
285 | 375 |
|
... | ... | |
306 | 396 |
success = false; |
307 | 397 |
} |
308 | 398 |
} |
309 |
|
|
399 |
ams.clear(); |
|
310 | 400 |
return success; |
311 | 401 |
} |
312 | 402 |
|
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/repository/KnowledgeRepositoryManager.java (revision 1141) | ||
---|---|---|
8 | 8 |
import javax.persistence.EntityManager; |
9 | 9 |
|
10 | 10 |
import org.apache.commons.io.FileUtils; |
11 |
import org.osgi.service.prefs.BackingStoreException; |
|
12 |
import org.osgi.service.prefs.Preferences; |
|
11 | 13 |
import org.txm.Toolbox; |
12 | 14 |
import org.txm.annotation.kr.core.DatabasePersistenceManager; |
13 | 15 |
import org.txm.annotation.kr.core.KRAnnotationEngine; |
14 | 16 |
import org.txm.core.preferences.TBXPreferences; |
15 | 17 |
import org.txm.objects.BaseOldParameters; |
18 |
import org.txm.objects.Project; |
|
16 | 19 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
17 | 20 |
import org.txm.sql.SQLConnection; |
18 | 21 |
import org.txm.utils.logger.Log; |
... | ... | |
44 | 47 |
HashMap<String, HashMap<String, ?>> conf = KRAnnotationEngine.getKnowledgeRepositoryConfiguration(name, element); |
45 | 48 |
return createKnowledgeRepository(name, conf); |
46 | 49 |
} |
50 |
|
|
51 |
/** |
|
52 |
* Lazy load KR. |
|
53 |
* |
|
54 |
* @param name |
|
55 |
* @param conf |
|
56 |
* @return |
|
57 |
* @throws BackingStoreException |
|
58 |
*/ |
|
59 |
@SuppressWarnings("unchecked") |
|
60 |
public static KnowledgeRepository createKnowledgeRepository(String name, Preferences preferences) throws BackingStoreException { |
|
61 |
HashMap<String, HashMap<String, ?>> conf = KRAnnotationEngine.getKnowledgeRepositoryConfiguration(name, preferences); |
|
62 |
return createKnowledgeRepository(name, conf); |
|
63 |
} |
|
47 | 64 |
|
48 | 65 |
/** |
49 | 66 |
* Lazy load KR. |
... | ... | |
128 | 145 |
* |
129 | 146 |
* @param name |
130 | 147 |
* @return |
148 |
* @throws BackingStoreException |
|
131 | 149 |
*/ |
132 |
public static KnowledgeRepository getKnowledgeRepository(String name, MainCorpus corpus) { |
|
150 |
public static KnowledgeRepository getKnowledgeRepository(String name, MainCorpus corpus) throws BackingStoreException {
|
|
133 | 151 |
KnowledgeRepository kr = null; |
134 | 152 |
|
135 | 153 |
File krconf = new File(Toolbox.getTxmHomePath(),"repositories/"+name+"/conf.xml"); |
136 | 154 |
|
137 |
if (repositories.containsKey(name)) { |
|
155 |
if (repositories.containsKey(name)) { // kr already loaded
|
|
138 | 156 |
kr = repositories.get(name); |
139 | 157 |
} else { |
140 |
|
|
141 | 158 |
if (krconf.exists()) { |
142 | 159 |
try { |
143 | 160 |
kr = createKnowledgeRepository(name, DomUtils.load(krconf).getDocumentElement()); |
... | ... | |
154 | 171 |
} |
155 | 172 |
} |
156 | 173 |
|
174 |
// test if the corpus has a new configuration for this KR |
|
157 | 175 |
//FIXME read KR configuration from Project preferences |
158 |
BaseOldParameters bp = null;//corpus.getProject().getProjectParameters(); |
|
159 |
Element krCorpusElement = KRAnnotationEngine.getKnowledgeRepositoryElement(bp, name); |
|
160 |
if (krCorpusElement == null) krCorpusElement = KRAnnotationEngine.createKnowledgeRepositoryElement(bp, name); |
|
161 |
String sKRCorpusVersion = krCorpusElement.getAttribute("version"); |
|
162 |
|
|
176 |
Project project = corpus.getProject(); |
|
177 |
Preferences corpusprefkrconf = project.getPreferencesScope().getNode("KnowledgeRepository").node(name); |
|
178 |
String sKRCorpusVersion = corpusprefkrconf.get("version", "0"); |
|
163 | 179 |
if (kr != null) { |
164 | 180 |
int version = kr.getVersion(); |
165 | 181 |
|
... | ... | |
172 | 188 |
} else { |
173 | 189 |
// replace the KR |
174 | 190 |
System.out.println("REPLACE OLD KR="+kr+" (version="+version+") WITH corpus KR configuration (version="+krCorpusVersion+")"); |
175 |
KnowledgeRepository krNew = createKnowledgeRepository(name, krCorpusElement);
|
|
191 |
KnowledgeRepository krNew = createKnowledgeRepository(name, corpusprefkrconf);
|
|
176 | 192 |
|
177 |
try { |
|
178 |
krconf.getParentFile().mkdirs(); |
|
179 |
DomUtils.save(krCorpusElement, krconf); |
|
193 |
try { // write the new configuration file in krconf File |
|
194 |
krNew.saveConfiguration(krconf); |
|
180 | 195 |
} catch (Exception e) { |
181 | 196 |
System.out.println("Fail to create KR from corpus configuration: "+corpus); |
182 | 197 |
Log.printStackTrace(e); |
... | ... | |
192 | 207 |
} else { |
193 | 208 |
// replace the KR |
194 | 209 |
//System.out.println("CREATE KR="+kr+" WITH corpus KR configuration"); |
195 |
KnowledgeRepository krNew = createKnowledgeRepository(name, krCorpusElement); |
|
210 |
KnowledgeRepository krNew = createKnowledgeRepository(name, corpusprefkrconf); |
|
211 |
|
|
212 |
if (krNew != null) { |
|
213 |
repositories.put(name, krNew); |
|
214 |
kr = krNew; |
|
215 |
} else { |
|
216 |
System.out.println("Internal error: the new KR is null: "+name); |
|
217 |
return null; |
|
218 |
} |
|
219 |
|
|
196 | 220 |
try { |
197 |
krconf.getParentFile().mkdirs(); |
|
198 |
DomUtils.save(krCorpusElement, krconf); |
|
221 |
krNew.saveConfiguration(krconf); |
|
199 | 222 |
} catch (Exception e) { |
200 | 223 |
System.out.println("Fail to create KR from corpus configuration: "+corpus); |
201 | 224 |
Log.printStackTrace(e); |
202 | 225 |
return kr; |
203 | 226 |
} |
204 |
if (krNew != null) { |
|
205 |
repositories.put(name, krNew); |
|
206 |
kr = krNew; |
|
207 |
} else { |
|
208 |
System.out.println("Internal error: the new KR is null: "+name); |
|
209 |
} |
|
227 |
|
|
228 |
|
|
210 | 229 |
} |
211 | 230 |
|
212 | 231 |
return kr; |
... | ... | |
219 | 238 |
public static boolean[] mustLoginToKnowledgeRepository(String kr_name, |
220 | 239 |
MainCorpus corpus) { |
221 | 240 |
|
222 |
KnowledgeRepository kr = getKnowledgeRepository(kr_name, corpus); |
|
241 |
KnowledgeRepository kr; |
|
242 |
try { |
|
243 |
kr = getKnowledgeRepository(kr_name, corpus); |
|
244 |
} catch (BackingStoreException e) { |
|
245 |
// TODO Auto-generated catch block |
|
246 |
e.printStackTrace(); |
|
247 |
return KnowledgeRepository.FALSES; |
|
248 |
} |
|
223 | 249 |
if (kr == null) return KnowledgeRepository.FALSES; |
224 | 250 |
|
225 | 251 |
return kr.mustLoginToKnowledgeRepository(); |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/repository/KnowledgeRepository.java (revision 1141) | ||
---|---|---|
1 | 1 |
package org.txm.annotation.kr.core.repository; |
2 | 2 |
|
3 |
import java.io.File; |
|
3 | 4 |
import java.net.URL; |
4 | 5 |
import java.util.HashMap; |
5 | 6 |
import java.util.List; |
... | ... | |
460 | 461 |
public void setStrings(HashMap<String, HashMap<String, String>> strings) { |
461 | 462 |
this.strings = strings; |
462 | 463 |
} |
464 |
|
|
465 |
public void saveConfiguration(File krconf) { |
|
466 |
// TODO Auto-generated method stub |
|
467 |
|
|
468 |
} |
|
463 | 469 |
} |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/AnnotationWriter.java (revision 1141) | ||
---|---|---|
10 | 10 |
import javax.xml.stream.XMLStreamException; |
11 | 11 |
|
12 | 12 |
import org.apache.commons.lang.StringUtils; |
13 |
import org.osgi.service.prefs.BackingStoreException; |
|
13 | 14 |
import org.txm.Toolbox; |
14 | 15 |
import org.txm.annotation.kr.core.repository.AnnotationEffect; |
15 | 16 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
... | ... | |
33 | 34 |
private List<AnnotationType> types; |
34 | 35 |
private KnowledgeRepository defaultKR; |
35 | 36 |
|
36 |
public AnnotationWriter(MainCorpus corpus){ |
|
37 |
public AnnotationWriter(MainCorpus corpus) throws BackingStoreException{
|
|
37 | 38 |
this.corpus = corpus; |
38 | 39 |
defaultKR = KRAnnotationEngine.getKnowledgeRepository(corpus, KRAnnotationEngine.getKnowledgeRepositoryNames(corpus).get(0)); |
39 | 40 |
types = defaultKR.getAllAnnotationTypes(); |
... | ... | |
131 | 132 |
* @throws CqiClientException |
132 | 133 |
* @throws InvalidCqpIdException |
133 | 134 |
* @throws XMLStreamException |
135 |
* @throws BackingStoreException |
|
134 | 136 |
*/ |
135 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException{ |
|
137 |
public boolean writeAnnotations(List<Annotation> allCorpusAnnotations) throws IOException, CqiServerError, CqiClientException, InvalidCqpIdException, XMLStreamException, BackingStoreException{
|
|
136 | 138 |
//MainCorpus corpus = CorpusManager.getCorpusManager().getCorpus(c.getName()); |
137 | 139 |
System.out.println("Saving "+allCorpusAnnotations.size()+"annotations..."); |
138 | 140 |
|
... | ... | |
197 | 199 |
ArrayList<Annotation> allSegmentAnnotations = new ArrayList<Annotation>(); |
198 | 200 |
ArrayList<Annotation> allTokenAnnotations = new ArrayList<Annotation>(); |
199 | 201 |
|
200 |
KnowledgeRepository defaultKR = KRAnnotationEngine.getKnowledgeRepository(corpus, KRAnnotationEngine.getKnowledgeRepositoryNames(corpus).get(0)); |
|
201 | 202 |
System.out.println("Using KR="+defaultKR); |
202 | 203 |
for (Annotation a : allAnnotations) { |
203 | 204 |
AnnotationType type = defaultKR.getType(a.getType()); |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/CQPAnnotationManager.java (revision 1141) | ||
---|---|---|
5 | 5 |
import java.util.HashMap; |
6 | 6 |
import java.util.List; |
7 | 7 |
|
8 |
import org.osgi.service.prefs.BackingStoreException; |
|
8 | 9 |
import org.txm.Toolbox; |
9 | 10 |
import org.txm.annotation.kr.core.repository.AnnotationEffect; |
10 | 11 |
import org.txm.annotation.kr.core.repository.AnnotationType; |
... | ... | |
79 | 80 |
|
80 | 81 |
// first get know annotations from corpus's KR |
81 | 82 |
for (String kr_name : KRAnnotationEngine.getKnowledgeRepositoryNames(corpus)) { |
82 |
KnowledgeRepository kr = KRAnnotationEngine.getKnowledgeRepository(corpus, kr_name); |
|
83 |
if (kr == null) continue; |
|
84 |
List<AnnotationType> types = kr.getAllAnnotationTypes(); |
|
85 |
for (AnnotationType t : types) { |
|
86 |
StructuralUnit su = corpus.getStructuralUnit(t.getId().toLowerCase()); |
|
87 |
if (su != null) { |
|
88 |
StructuralUnitProperty sup = su.getProperty(REF); |
|
89 |
if (sup != null) { |
|
90 |
supList.put(sup, t); |
|
83 |
KnowledgeRepository kr; |
|
84 |
try { |
|
85 |
kr = KRAnnotationEngine.getKnowledgeRepository(corpus, kr_name); |
|
86 |
if (kr == null) continue; |
|
87 |
List<AnnotationType> types = kr.getAllAnnotationTypes(); |
|
88 |
for (AnnotationType t : types) { |
|
89 |
StructuralUnit su = corpus.getStructuralUnit(t.getId().toLowerCase()); |
|
90 |
if (su != null) { |
|
91 |
StructuralUnitProperty sup = su.getProperty(REF); |
|
92 |
if (sup != null) { |
|
93 |
supList.put(sup, t); |
|
94 |
} |
|
91 | 95 |
} |
92 | 96 |
} |
97 |
} catch (BackingStoreException e) { |
|
98 |
// TODO Auto-generated catch block |
|
99 |
e.printStackTrace(); |
|
93 | 100 |
} |
94 | 101 |
} |
95 |
|
|
96 | 102 |
|
97 | 103 |
for (StructuralUnit su : corpus.getStructuralUnits()) { |
98 | 104 |
if (su.getName().equals("text") || su.getName().equals("txmcorpus") || |
tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 1141) | ||
---|---|---|
1764 | 1764 |
|
1765 | 1765 |
this.monitor = monitor; |
1766 | 1766 |
|
1767 |
|
|
1767 | 1768 |
if (!this.needsFullRecomputing && !this.isDirtyFromHistory() && !this.isDirty()) { |
1769 |
// needsFullRecomputing == true && isDirtyFromHistory == true && isDirty == true |
|
1768 | 1770 |
Log.finest("TXMResult.compute(): " + this.getClass().getSimpleName() + ": result parameters have not changed since last computing, computing skipped."); |
1769 | 1771 |
skipComputing = true; |
1770 | 1772 |
} |
... | ... | |
1775 | 1777 |
Log.finest("TXMResult.compute(): " + this.getClass().getSimpleName() + ": computing result of type " + this.getClass() + "..."); |
1776 | 1778 |
|
1777 | 1779 |
|
1778 |
// // TODO THIS IS FUCKING LAZY LINE CODES OH YEAH YOU MAD BRO
|
|
1780 |
// TODO THIS IS FUCKING LAZY LINE CODES OH YEAH YOU MAD BRO |
|
1779 | 1781 |
// TODO where do we put this parent compute ? :o |
1780 | 1782 |
// if (parent != null && !parent.getHasBeenComputedOnce()) { // parent must be computed at least one time |
1781 | 1783 |
// SJ: other way, test the object itself |
tmp/org.txm.core/src/java/org/txm/importer/xtz/ImportModule.java (revision 1141) | ||
---|---|---|
183 | 183 |
return files; |
184 | 184 |
} |
185 | 185 |
|
186 |
|
|
187 | 186 |
public void end() { |
188 | 187 |
File paramFile = new File(binaryDirectory, "import.xml"); |
189 | 188 |
try { |
tmp/org.txm.index.core/src/org/txm/index/core/functions/___Lexicon2.java (revision 1141) | ||
---|---|---|
104 | 104 |
* Creates a not computed lexicon from the specified corpus. |
105 | 105 |
* @param parent |
106 | 106 |
*/ |
107 |
public ___Lexicon2(TXMResult parent) {
|
|
107 |
public ___Lexicon2(CQPCorpus parent) {
|
|
108 | 108 |
super(parent); |
109 | 109 |
} |
110 | 110 |
|
tmp/org.txm.index.core/src/org/txm/index/core/functions/PartitionIndex.java (revision 1141) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-10-21 11:09:43 +0200 (Fri, 21 Oct 2016) $ |
|
25 |
// $LastChangedRevision: 3323 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.index.core.functions; |
|
29 |
|
|
30 |
import java.io.BufferedWriter; |
|
31 |
import java.io.File; |
|
32 |
import java.io.FileOutputStream; |
|
33 |
import java.io.IOException; |
|
34 |
import java.io.OutputStreamWriter; |
|
35 |
import java.util.ArrayList; |
|
36 |
import java.util.Arrays; |
|
37 |
import java.util.Collections; |
|
38 |
import java.util.HashMap; |
|
39 |
import java.util.LinkedHashMap; |
|
40 |
import java.util.List; |
|
41 |
|
|
42 |
import org.eclipse.osgi.util.NLS; |
|
43 |
import org.rosuda.REngine.REXPMismatchException; |
|
44 |
import org.txm.core.preferences.TXMPreferences; |
|
45 |
import org.txm.core.results.Parameter; |
|
46 |
import org.txm.core.results.TXMParameters; |
|
47 |
import org.txm.core.results.TXMResult; |
|
48 |
import org.txm.index.core.functions.LineComparator.SortMode; |
|
49 |
import org.txm.index.core.messages.IndexCoreMessages; |
|
50 |
import org.txm.index.core.preferences.IndexPreferences; |
|
51 |
import org.txm.searchengine.core.Match; |
|
52 |
import org.txm.searchengine.core.Query; |
|
53 |
import org.txm.searchengine.core.Selection; |
|
54 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
55 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
|
56 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
57 |
import org.txm.searchengine.cqp.corpus.Part; |
|
58 |
import org.txm.searchengine.cqp.corpus.Partition; |
|
59 |
import org.txm.searchengine.cqp.corpus.Property; |
|
60 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
|
61 |
import org.txm.searchengine.cqp.corpus.WordProperty; |
|
62 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
63 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
64 |
import org.txm.statsengine.r.core.RWorkspace; |
|
65 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
66 |
import org.txm.utils.logger.Log; |
|
67 |
|
|
68 |
/** |
|
69 |
* Computes an index of a corpus or a partition. |
|
70 |
* |
|
71 |
* @author mdecorde |
|
72 |
*/ |
|
73 |
public class PartitionIndex extends TXMResult { |
|
74 |
|
|
75 |
/** The counts. */ |
|
76 |
protected LinkedHashMap<String, ArrayList<Integer>> counts = new LinkedHashMap<String, ArrayList<Integer>>(); |
|
77 |
|
|
78 |
/** The currentpartid. */ |
|
79 |
protected int currentpartid = 0; |
|
80 |
|
|
81 |
/** The current Fmax value. */ |
|
82 |
protected int Fmax = 0; |
|
83 |
/** The current Fmin value. */ |
|
84 |
protected int Fmin = 9999999; |
|
85 |
|
|
86 |
/** The lexicon used to compute the index (if query = null || query == [] && properties.size() == 1 */ |
|
87 |
@Deprecated |
|
88 |
protected Lexicon lexicon; |
|
89 |
@Deprecated |
|
90 |
protected boolean lexiconMode = false; |
|
91 |
|
|
92 |
|
|
93 |
/** The current lines. */ |
|
94 |
protected List<Line> lines = new ArrayList<Line>(); |
|
95 |
/** The current number of lines. */ |
|
96 |
protected int nTotalTokens = 0; |
|
97 |
|
|
98 |
/** The partnames. The corpus name if built with a Corpus or the parts names if built with a Partition */ |
|
99 |
protected List<String> partnames = new ArrayList<String>(); |
|
100 |
|
|
101 |
/** The writer. */ |
|
102 |
// FIXME: should be in an exporter extension |
|
103 |
@Deprecated |
|
104 |
private BufferedWriter writer; |
|
105 |
|
|
106 |
/** |
|
107 |
* Maximum frequency filter value. |
|
108 |
*/ |
|
109 |
@Parameter(key=TXMPreferences.F_MAX) |
|
110 |
protected Integer pFmaxFilter; |
|
111 |
|
|
112 |
/** |
|
113 |
* Minimum frequency filter value. |
|
114 |
*/ |
|
115 |
@Parameter(key=TXMPreferences.F_MIN) |
|
116 |
protected Integer pFminFilter; |
|
117 |
|
|
118 |
/** |
|
119 |
* Number of lines to display per page. |
|
120 |
*/ |
|
121 |
@Parameter(key=TXMPreferences.N_LINES_PER_PAGE) |
|
122 |
protected Integer pNLinesPerPage; |
|
123 |
|
|
124 |
/** |
|
125 |
* The word properties to display. |
|
126 |
*/ |
|
127 |
@Parameter(key=TXMPreferences.UNIT_PROPERTIES) |
|
128 |
protected List<WordProperty> pProperties; |
|
129 |
|
|
130 |
/** |
|
131 |
* The string used to separated property values. |
|
132 |
*/ |
|
133 |
@Parameter(key=IndexPreferences.PROPERTIES_SEPARATOR) |
|
134 |
protected String pPropertiesSeparator; |
|
135 |
|
|
136 |
/** |
|
137 |
* The CQP query. |
|
138 |
*/ |
|
139 |
@Parameter(key=TXMPreferences.QUERY) |
|
140 |
protected Query pQuery; |
|
141 |
|
|
142 |
/** |
|
143 |
* The line index of the current index page. |
|
144 |
*/ |
|
145 |
@Parameter(key=IndexPreferences.N_TOP_INDEX) |
|
146 |
private Integer pTopIndex; |
|
147 |
|
|
148 |
/** |
|
149 |
* The vmax filter value parameter. |
|
150 |
*/ |
|
151 |
@Parameter(key=TXMPreferences.V_MAX) |
|
152 |
protected Integer pVmaxFilter; |
|
153 |
|
|
154 |
/** |
|
155 |
* |
|
156 |
* @param parent |
|
157 |
*/ |
|
158 |
public PartitionIndex(Partition parent) { |
|
159 |
super(parent); |
|
160 |
} |
|
161 |
|
|
162 |
/** |
|
163 |
* |
|
164 |
* @param parametersNodePath |
|
165 |
*/ |
|
166 |
public PartitionIndex(String parametersNodePath) { |
|
167 |
super(parametersNodePath); |
|
168 |
} |
|
169 |
|
|
170 |
|
|
171 |
@Override |
|
172 |
protected boolean _compute() throws Exception { |
|
173 |
lines.clear(); |
|
174 |
counts.clear(); |
|
175 |
partnames.clear(); |
|
176 |
currentpartid = 0; |
|
177 |
nTotalTokens = 0; |
|
178 |
|
|
179 |
Partition partition = (Partition)parent; |
|
180 |
for (Part part : partition.getParts()) { |
|
181 |
scanCorpus(part); |
|
182 |
currentpartid++; |
|
183 |
partnames.add(part.getName()); |
|
184 |
} |
|
185 |
|
|
186 |
setLineCounts(); |
|
187 |
|
|
188 |
getAllLines(); |
|
189 |
|
|
190 |
this.filterLines(); |
|
191 |
|
|
192 |
this.subTask("Sorting..."); |
|
193 |
|
|
194 |
this.sortLines(SortMode.FREQUNIT, true); |
|
195 |
|
|
196 |
this.cut(); |
|
197 |
|
|
198 |
this.dirty = false; |
|
199 |
this.pTopIndex = 0; |
|
200 |
|
|
201 |
this.subTask("Index done."); |
|
202 |
|
|
203 |
return true; |
|
204 |
} |
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
/** |
|
209 |
* Creates a CQL query string from the specified Index lines. |
|
210 |
* |
|
211 |
* @param lines |
|
212 |
* @return the query |
|
213 |
*/ |
|
214 |
public static String createQuery(List<Line> lines) { |
|
215 |
String query = ""; //$NON-NLS-1$ |
|
216 |
if (lines.size() == 0) { |
|
217 |
return query; |
|
218 |
} |
|
219 |
|
|
220 |
Line line = lines.get(0); |
|
221 |
int nbToken = line.getUnitsProperties().get(0).size(); |
|
222 |
int nbProps = line.getProperties().size(); |
|
223 |
int nbLines = lines.size(); |
|
224 |
List<Property> props = line.getProperties(); |
|
225 |
for (int t = 0; t < nbToken; t++) { |
|
226 |
query += "["; //$NON-NLS-1$ |
|
227 |
for (int p = 0; p < nbProps; p++) { |
|
228 |
if (props.get(p) instanceof StructuralUnitProperty) { |
|
229 |
query += "_."+((StructuralUnitProperty)props.get(p)).getFullName() + "=\""; //$NON-NLS-1$ //$NON-NLS-2$ |
|
230 |
} |
|
231 |
else { |
|
232 |
query += props.get(p) + "=\""; //$NON-NLS-1$ |
|
233 |
} |
|
234 |
for (int l = 0; l < nbLines; l++) { |
|
235 |
line = lines.get(l); |
|
236 |
List<List<String>> values = line.getUnitsProperties(); |
|
237 |
String s = values.get(p).get(t); |
|
238 |
s = CQLQuery.addBackSlash(s); |
|
239 |
query += s + "|"; //$NON-NLS-1$ |
|
240 |
} |
|
241 |
query = query.substring(0, query.length() - 1); |
|
242 |
query += "\" & "; //$NON-NLS-1$ |
|
243 |
} |
|
244 |
query = query.substring(0, query.length() - 3); |
|
245 |
query += "] "; //$NON-NLS-1$ |
|
246 |
} |
|
247 |
query = query.substring(0, query.length() - 1); |
|
248 |
return query; |
|
249 |
} |
|
250 |
|
|
251 |
|
|
252 |
/** |
|
253 |
* Creates a Query list from the specified Index lines. |
|
254 |
* @param lines |
|
255 |
* @return |
|
256 |
*/ |
|
257 |
public static List<CQLQuery> createQueries(List<Line> lines) { |
|
258 |
List<CQLQuery> queries = new ArrayList<CQLQuery>(); |
|
259 |
for (Line line : lines) { |
|
260 |
String query = ""; //$NON-NLS-1$ |
|
261 |
int nbToken = line.getUnitsProperties().get(0).size(); |
|
262 |
int nbProps = line.getProperties().size(); |
|
263 |
List<List<String>> values = line.getUnitsProperties(); |
|
264 |
List<Property> props = line.getProperties(); |
|
265 |
for (int t = 0; t < nbToken; t++) { |
|
266 |
query += "["; //$NON-NLS-1$ |
|
267 |
for (int p = 0; p < nbProps; p++) { |
|
268 |
query += props.get(p).getName() + "=\""; //$NON-NLS-1$ |
|
269 |
String s = values.get(p).get(t); |
|
270 |
s = CQLQuery.addBackSlash(s); |
|
271 |
query += s; |
|
272 |
query += "\" & "; //$NON-NLS-1$ |
|
273 |
} |
|
274 |
query = query.substring(0, query.length() - 3); |
|
275 |
query += "] "; //$NON-NLS-1$ |
|
276 |
} |
|
277 |
queries.add(new CQLQuery(query)); |
|
278 |
} |
|
279 |
return queries; |
|
280 |
|
|
281 |
} |
|
282 |
|
|
283 |
|
|
284 |
|
|
285 |
/** |
|
286 |
* This method alter the index first column frequencies using a table stored in the R workspace |
|
287 |
* |
|
288 |
* @param referenceCorpus the R table variable name |
|
289 |
* @return true if frequencies have been altered |
|
290 |
* |
|
291 |
* @throws RWorkspaceException |
|
292 |
* @throws REXPMismatchException |
|
293 |
*/ |
|
294 |
public boolean alterFrequencies(String referenceCorpus) throws RWorkspaceException, REXPMismatchException { |
|
295 |
String[] ref_forms = RWorkspace.getRWorkspaceInstance().eval("rownames("+referenceCorpus+")").asStrings(); //$NON-NLS-1$ //$NON-NLS-2$ |
|
296 |
int[] ref_freqs = RWorkspace.getRWorkspaceInstance().eval(referenceCorpus+"[,1]").asIntegers(); //$NON-NLS-1$ |
|
297 |
if (ref_forms.length != ref_freqs.length) { |
|
298 |
System.out.println("Cannot alter index frequencies with the '"+referenceCorpus+"' empty table."); |
|
299 |
return false; |
|
300 |
} |
|
301 |
HashMap<String, Integer> ref_counts = new HashMap<String, Integer>(); |
|
302 |
for(int i = 0 ; i < ref_forms.length ; i++) { |
|
303 |
ref_counts.put(ref_forms[i], ref_freqs[i]); |
|
304 |
} |
|
305 |
|
|
306 |
for (org.txm.index.core.functions.Line l : this.getAllLines()) { |
|
307 |
String key = l.toString(); |
|
308 |
if (ref_counts.containsKey(key)) { |
|
309 |
int[] f = {ref_counts.get(key)}; |
|
310 |
l.setCounts(f, 0); |
|
311 |
} |
|
312 |
} |
|
313 |
this.updateFminFmax(); |
|
314 |
return true; |
|
315 |
//voc.toTxt(new File("/home/mdecorde/TEMP/after.tsv"), "UTF-8", "\t", ""); |
|
316 |
} |
|
317 |
|
|
318 |
@Override |
|
319 |
public boolean canCompute() { |
|
320 |
if (getCorpus() == null && getPartition() == null) { |
|
321 |
Log.severe("Corpus or partition not set."); //$NON-NLS-1$ |
|
322 |
return false; |
|
323 |
} |
|
324 |
|
|
325 |
if (pProperties == null || pProperties.size() == 0) { |
|
326 |
Log.severe("No property set."); //$NON-NLS-1$ |
|
327 |
return false; |
|
328 |
} |
|
329 |
|
|
330 |
if (pQuery == null || pQuery.getQueryString().length() == 0) { |
|
331 |
Log.severe("No query set."); //$NON-NLS-1$ |
|
332 |
return false; |
|
333 |
} |
|
334 |
|
|
335 |
return true; |
|
336 |
} |
|
337 |
|
|
338 |
@Override |
|
339 |
public boolean saveParameters() { |
|
340 |
|
|
341 |
this.saveParameter(TXMPreferences.UNIT_PROPERTIES, WordProperty.propertiesToString(pProperties)); |
|
342 |
|
|
343 |
if (pQuery != null) { |
|
344 |
this.saveParameter(TXMPreferences.QUERY, pQuery.getQueryString()); |
|
345 |
} |
|
346 |
|
|
347 |
|
|
348 |
return true; |
|
349 |
} |
|
350 |
|
|
351 |
@Override |
|
352 |
public boolean loadParameters() { |
|
353 |
this.pProperties = (List<WordProperty>) Property.stringToProperties(getCorpus(), this.getStringParameterValue(TXMPreferences.UNIT_PROPERTIES)); |
|
354 |
this.pQuery = new CQLQuery(this.getStringParameterValue(TXMPreferences.QUERY)); |
|
355 |
return true; |
|
356 |
} |
|
357 |
|
|
358 |
@Override |
|
359 |
public void clean() { |
|
360 |
try { |
|
361 |
if (this.writer != null) { |
|
362 |
this.writer.flush(); |
|
363 |
this.writer.close(); |
|
364 |
} |
|
365 |
} catch (IOException e) { |
|
366 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
367 |
} |
|
368 |
} |
|
369 |
|
|
370 |
/** |
|
371 |
* keep the vmax lines more frequents. |
|
372 |
* |
|
373 |
*/ |
|
374 |
public void cut() { |
|
375 |
if (pVmaxFilter == null) { |
|
376 |
return; |
|
377 |
} |
|
378 |
|
|
379 |
this.acquireSemaphore(); |
|
380 |
|
|
381 |
Log.info("Cutting Tmax="+pVmaxFilter); |
|
382 |
// assume the lines are sorted |
|
383 |
//int before = lines.size(); |
|
384 |
this.lines = this.lines.subList(0, Math.min(lines.size(), pVmaxFilter)); |
|
385 |
this.updateFminFmax(); |
|
386 |
|
|
387 |
this.releaseSemaphore(); |
|
388 |
} |
|
389 |
|
|
390 |
/** |
|
391 |
* Equals. |
|
392 |
* |
|
393 |
* @param index the index |
|
394 |
* @return true, if successful |
|
395 |
*/ |
|
396 |
public boolean equals(PartitionIndex index) { |
|
397 |
try { |
|
398 |
return this.pQuery.equals(index.getQuery()) |
|
399 |
&& this.pProperties.equals(index.getProperties()) |
|
400 |
&& this.pFminFilter == index.getFilterFmin() |
|
401 |
&& this.pFmaxFilter == index.getFilterFmax(); |
|
402 |
} |
|
403 |
catch(Exception e) { |
|
404 |
} |
|
405 |
return false; |
|
406 |
} |
|
407 |
|
|
408 |
|
|
409 |
/** |
|
410 |
* Removes lines with frequency not in [Fmin,Fmax] range. |
|
411 |
* |
|
412 |
**/ |
|
413 |
public void filterLines() { |
|
414 |
|
|
415 |
if (!(pFminFilter > 0 && pFmaxFilter > 0 && pFminFilter <= pFmaxFilter)) { |
|
416 |
return; |
|
417 |
} |
|
418 |
|
|
419 |
Log.info("Filtering Fmin = " + pFminFilter + " and Fmax = " + pFmaxFilter); //$NON-NLS-1$ //$NON-NLS-2$ |
|
420 |
|
|
421 |
for (int i = 0; i < lines.size(); i++) { // for each line |
|
422 |
|
|
423 |
Line line = lines.get(i); |
|
424 |
int f = line.getFrequency(); |
|
425 |
if (f < pFminFilter) { // if its frequency is not in the interval, remove it |
|
426 |
|
|
427 |
nTotalTokens -= line.getFrequency(); |
|
428 |
lines.remove(i); |
|
429 |
i--; |
|
430 |
continue; // no need to go further the line is removed |
|
431 |
} |
|
432 |
if (f > pFmaxFilter) { // if its frequency is not in the interval, remove it |
|
433 |
|
|
434 |
nTotalTokens -= line.getFrequency(); |
|
435 |
lines.remove(i); |
|
436 |
i--; |
|
437 |
} |
|
438 |
} |
|
439 |
|
|
440 |
this.updateFminFmax(); |
|
441 |
} |
|
442 |
|
|
443 |
/** |
|
444 |
* Gets the corpus. |
|
445 |
* |
|
446 |
* @return the corpus |
|
447 |
*/ |
|
448 |
public CQPCorpus getCorpus() { |
|
449 |
if (this.parent instanceof CQPCorpus) { |
|
450 |
return (CQPCorpus) this.parent; |
|
451 |
} else if (this.parent instanceof Partition) { |
|
452 |
return ((Partition)this.parent).getParent(); |
|
453 |
} else if (this.lexicon != null) { |
|
454 |
return this.lexicon.getParent(); |
|
455 |
} else { |
|
456 |
return null; |
|
457 |
} |
|
458 |
} |
|
459 |
|
|
460 |
@Override |
|
461 |
public String getDetails() { |
|
462 |
if (lexicon != null) { |
|
463 |
return lexicon.getDetails(); |
|
464 |
} else { |
|
465 |
|
|
466 |
try { |
|
467 |
Object[] params = new Object[]{this.parent.getSimpleName(), this.getQuery().getQueryString(), this.getProperties(), this.getFmin(), this.getFmax()}; |
|
468 |
String str; |
|
469 |
if(this.parent instanceof Partition) { |
|
470 |
str = IndexCoreMessages.DetailsFromPartition; |
|
471 |
} |
|
472 |
else { |
|
473 |
str = IndexCoreMessages.DetailsFromCorpus; |
|
474 |
} |
|
475 |
return NLS.bind(str, params); |
|
476 |
} |
|
477 |
catch(Exception e) { |
|
478 |
return getName(); |
|
479 |
} |
|
480 |
} |
|
481 |
} |
|
482 |
|
|
483 |
/** |
|
484 |
* Gets the filter fmax. |
|
485 |
* |
|
486 |
* @return the filter fmax |
|
487 |
*/ |
|
488 |
public Integer getFilterFmax() { |
|
489 |
return pFmaxFilter; |
|
490 |
} |
|
491 |
|
|
492 |
/** |
|
493 |
* Gets the filter fmin. |
|
494 |
* |
|
495 |
* @return the filter fmin |
|
496 |
*/ |
|
497 |
public Integer getFilterFmin() { |
|
498 |
return pFminFilter; |
|
499 |
} |
|
500 |
|
|
501 |
/** |
|
502 |
* Gets the filter vmax. |
|
503 |
* |
|
504 |
* @return the filter vmax |
|
505 |
*/ |
|
506 |
public Integer getFilterVmax() { |
|
507 |
return pVmaxFilter; |
|
508 |
} |
|
509 |
|
|
510 |
/** |
|
511 |
* Gets the fmax. |
|
512 |
* |
|
513 |
* @return the fmax |
|
514 |
*/ |
|
515 |
public int getFmax() { |
|
516 |
return Fmax; |
|
517 |
} |
|
518 |
|
|
519 |
/** |
|
520 |
* Gets the fmin. |
|
521 |
* |
|
522 |
* @return the fmin |
|
523 |
*/ |
|
524 |
public int getFmin() { |
|
525 |
return Fmin; |
|
526 |
} |
|
527 |
|
|
528 |
/** |
|
529 |
* If the index has been build with corpus + property, the method returns the lexicon used. |
|
530 |
* |
|
531 |
* @return the lexicon |
|
532 |
*/ |
|
533 |
public Lexicon getLexicon() { |
|
534 |
return lexicon; |
|
535 |
} |
|
536 |
|
|
537 |
public boolean getLexiconMode() { |
|
538 |
return this.lexiconMode; |
|
539 |
} |
|
540 |
|
|
541 |
/** |
|
542 |
* return the lines from le "start"th one to the "end"th one. |
|
543 |
* |
|
544 |
* @param start the start |
|
545 |
* @param end the end |
|
546 |
* @return the lines |
|
547 |
*/ |
|
548 |
public List<Line> getLines(int start, int end) { |
|
549 |
// long time = System.currentTimeMillis(); |
|
550 |
List<Line> selectedLines = new ArrayList<Line>(); |
|
551 |
if (lines.size() > 0) { |
|
552 |
start = Math.max(0, start); |
|
553 |
end = Math.min(end, lines.size()); |
|
554 |
selectedLines = lines.subList(start, end); |
|
555 |
|
|
556 |
int p = 0; |
|
557 |
// for each property get the string values of the tokens |
|
558 |
for (Property property : pProperties) { |
|
559 |
|
|
560 |
int len = 0; |
|
561 |
for (Line l : selectedLines) { |
|
562 |
len += l.UnitsIds.get(p).length; |
|
563 |
} |
|
564 |
|
|
565 |
int[] indices = new int[len]; // build the array of indices |
|
566 |
len = 0; |
|
567 |
for (Line l : selectedLines) { |
|
568 |
int[] ids = l.UnitsIds.get(p); |
|
569 |
System.arraycopy(ids, 0, indices, len, ids.length); |
|
570 |
len += ids.length; |
|
571 |
} |
|
572 |
String[] strs = null; |
|
573 |
try { |
|
574 |
if (property instanceof StructuralUnitProperty) { |
|
575 |
strs = CorpusManager.getCorpusManager().getCqiClient().struc2Str(property.getQualifiedName(), indices); |
|
576 |
} |
|
577 |
else { |
|
578 |
strs = CorpusManager.getCorpusManager().getCqiClient().id2Str(property.getQualifiedName(), indices); |
|
579 |
} |
|
580 |
} catch (Exception e) { |
|
581 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
582 |
return null; |
|
583 |
} |
|
584 |
len = 0; |
|
585 |
for (Line l : selectedLines) { |
|
586 |
int[] ids = l.UnitsIds.get(p); |
|
587 |
String[] lstr = new String[ids.length]; |
|
588 |
System.arraycopy(strs, len, lstr, 0, ids.length); |
|
589 |
if (l.UnitsProperty.size() == pProperties.size()) continue; // the line is already initialized |
|
590 |
l.put(property, Arrays.asList(lstr)); |
|
591 |
len += ids.length; |
|
592 |
} |
|
593 |
p++; |
|
594 |
} |
|
595 |
} |
|
596 |
// System.out.println("Time get lines "+(System.currentTimeMillis()-time)); |
|
597 |
return selectedLines; |
|
598 |
} |
|
599 |
|
|
600 |
|
|
601 |
/** |
|
602 |
* return all the lines of the index. |
|
603 |
* |
|
604 |
* @return the all lines |
|
605 |
*/ |
|
606 |
public List<Line> getAllLines() { |
|
607 |
return getLines(0, lines.size()); |
|
608 |
} |
|
609 |
|
|
610 |
/** |
|
611 |
* update the lines counts. |
|
612 |
*/ |
|
613 |
protected void setLineCounts() { |
|
614 |
for (Line line : lines) {// for each Line set its count |
|
615 |
int[] c = new int[partnames.size()]; |
|
616 |
for (int i = 0; i < partnames.size(); i++) { |
|
617 |
if (counts.get(line.getSignature()).size() <= i) { |
|
618 |
c[i] = 0; |
|
619 |
} |
|
620 |
else { |
|
621 |
c[i] = counts.get(line.getSignature()).get(i); |
|
622 |
} |
|
623 |
} |
|
624 |
line.setCounts(c, -1); |
|
625 |
} |
|
626 |
} |
|
627 |
|
|
628 |
|
|
629 |
|
|
630 |
|
|
631 |
@Override |
|
632 |
public String getName() { |
|
633 |
try { |
|
634 |
if (lexicon != null) { |
|
635 |
return lexicon.getName(); |
|
636 |
} |
|
637 |
else { |
|
638 |
return IndexCoreMessages.RESULT_TYPE + ": " + this.parent.getSimpleName() + ": " + this.getSimpleName(); |
|
639 |
} |
|
640 |
} |
|
641 |
catch(Exception e) { |
|
642 |
return this.getEmptyName(); |
|
643 |
} |
|
644 |
} |
|
645 |
|
|
646 |
/** |
|
647 |
* Gets the number of lines per page. |
|
648 |
* @return the number of lines per page |
|
649 |
*/ |
|
650 |
public Integer getNLinesPerPage() { |
|
651 |
return pNLinesPerPage ; |
|
652 |
} |
|
653 |
|
|
654 |
|
|
655 |
/** |
|
656 |
* Gets the partition. |
|
657 |
* |
|
658 |
* @return the partition if exists otherwise null |
|
659 |
*/ |
|
660 |
public Partition getPartition() { |
|
661 |
try { |
|
662 |
return (Partition) this.parent; |
|
663 |
} |
|
664 |
catch(ClassCastException e) { |
|
665 |
} |
|
666 |
return null; |
|
667 |
} |
|
668 |
|
|
669 |
/** |
|
670 |
* Gets the part names. |
|
671 |
* |
|
672 |
* @return the part names |
|
673 |
*/ |
|
674 |
public List<String> getPartnames() { |
|
675 |
return partnames; |
|
676 |
} |
|
677 |
|
|
678 |
/** |
|
679 |
* Gets the properties. |
|
680 |
* |
|
681 |
* @return the properties |
|
682 |
*/ |
|
683 |
public List<WordProperty> getProperties() { |
|
684 |
return this.pProperties; |
|
685 |
} |
|
686 |
|
|
687 |
|
|
688 |
/** |
|
689 |
* |
|
690 |
* @return |
|
691 |
*/ |
|
692 |
public String getPropertySeparator() { |
|
693 |
return pPropertiesSeparator ; |
|
694 |
} |
|
695 |
|
|
696 |
/** |
|
697 |
* Gets the query. |
|
698 |
* |
|
699 |
* @return the query used |
|
700 |
*/ |
|
701 |
public Query getQuery() { |
|
702 |
return pQuery; |
|
703 |
} |
|
704 |
|
|
705 |
@Override |
|
706 |
public String getSimpleName() { |
|
707 |
if (lexicon != null) { |
|
708 |
return lexicon.getSimpleName(); |
|
709 |
} |
|
710 |
else { |
|
711 |
String name = ""; //$NON-NLS-1$ |
|
712 |
if (pQuery != null && !pQuery.isEmpty()) { |
|
713 |
name += pQuery.getQueryString(); |
|
714 |
name += ": "; //$NON-NLS-1$ |
|
715 |
|
|
716 |
for (Property s : pProperties) { |
|
717 |
name += s.getName() + pPropertiesSeparator; |
|
718 |
} |
|
719 |
if (pProperties.size() > 0) { |
|
720 |
name = name.substring(0, name.length() - 1); |
|
721 |
} |
|
722 |
return name; |
|
723 |
} |
|
724 |
else { |
|
725 |
return this.getEmptyName(); |
|
726 |
} |
|
727 |
} |
|
728 |
} |
|
729 |
|
|
730 |
/** |
|
731 |
* Gets the number of tokens found. |
|
732 |
* |
|
733 |
* @return the number of tokens returned by the selection |
|
734 |
*/ |
|
735 |
public int getT() { |
|
736 |
return nTotalTokens; |
|
737 |
} |
|
738 |
|
|
739 |
/** |
|
740 |
* |
|
741 |
* @return |
|
742 |
*/ |
|
743 |
public int getTopIndex() { |
|
744 |
return pTopIndex; |
|
745 |
} |
|
746 |
|
|
747 |
/** |
|
748 |
* Gets the v. |
|
749 |
* |
|
750 |
* @return the number of entries in the index |
|
751 |
*/ |
|
752 |
public int getV() { |
|
753 |
return lines.size(); |
|
754 |
} |
|
755 |
|
|
756 |
/** |
|
757 |
* Tell if the index has been computed with a partition or not. |
|
758 |
* |
|
759 |
* @return true, if is computed with partition |
|
760 |
*/ |
|
761 |
public boolean isComputedWithPartition() { |
|
762 |
return (this.getParent() instanceof Partition); |
|
763 |
} |
|
764 |
|
|
765 |
/** |
|
766 |
* count tokens. |
|
767 |
* |
|
768 |
* @param corpus the corpus to scan |
|
769 |
* @return true, if successful |
|
770 |
* @throws CqiClientException |
|
771 |
* @throws CqiServerError |
|
772 |
* @throws IOException |
|
773 |
*/ |
|
774 |
protected boolean scanCorpus(CQPCorpus corpus) throws Exception { |
|
775 |
// get the cqp result of the query |
|
776 |
|
|
777 |
//long time = System.currentTimeMillis(); |
|
778 |
Selection result = null; |
|
779 |
if (pQuery instanceof CQLQuery) { |
|
780 |
result = corpus.query((CQLQuery)pQuery, "index", true); //$NON-NLS-1$ |
|
781 |
} else { |
|
782 |
result = pQuery.getSearchEngine().query(corpus, pQuery, "index", true); //$NON-NLS-1$ |
|
783 |
} |
|
784 |
boolean isTargetUsed = result.isTargetUsed(); |
|
785 |
int nbresults = result.getNMatch(); |
|
786 |
this.nTotalTokens += nbresults; // get number of tokens |
|
787 |
|
|
788 |
// System.out.println("nLines : "+nLines); |
|
789 |
List<? extends Match> matches = null; |
|
790 |
if (nbresults > 0) { |
|
791 |
matches = result.getMatches(0, nbresults - 1); // get the indexes sequences of result's tokens |
|
792 |
} |
|
793 |
else { |
|
794 |
matches = new ArrayList<Match>(); |
|
795 |
} |
|
796 |
// count matches |
|
797 |
// time = System.currentTimeMillis(); |
|
798 |
List<Integer> allpositions = new ArrayList<Integer>(); |
|
799 |
for (int j = 0; j < nbresults; j++) { |
|
800 |
Match match = matches.get(j); |
|
801 |
// beginingOfKeywordsPositions.add(match.getStart()); // get the |
|
802 |
// first index |
|
803 |
// lengthOfKeywords.add(match.size());// get the last index |
|
804 |
if (isTargetUsed) { |
|
805 |
allpositions.add(match.getTarget()); |
|
806 |
} |
|
807 |
else { |
|
808 |
for (int i = match.getStart(); i <= match.getEnd(); i++) { |
|
809 |
allpositions.add(i); |
|
810 |
} |
|
811 |
} |
|
812 |
} |
|
813 |
// System.out.println("get string data"); |
|
814 |
// time = System.currentTimeMillis(); |
|
815 |
// for (Property property : props) {// for each property get the |
|
816 |
// string values of the tokens |
|
817 |
// keywordsViewPropValues.put(property, |
|
818 |
// cache.get(property).getData(beginingOfKeywordsPositions, |
|
819 |
// lengthOfKeywords)); |
|
820 |
// } |
|
821 |
// System.out.println("took "+(System.currentTimeMillis()-time)); |
|
822 |
|
|
823 |
// System.out.println("get count data"); |
|
824 |
|
Formats disponibles : Unified diff