Révision 1395
tmp/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 1395) | ||
---|---|---|
1079 | 1079 |
deleteLineButton = new Button(navigationArea, SWT.PUSH); |
1080 | 1080 |
deleteLineButton.setLayoutData(new GridData(GridData.FILL, GridData.FILL, false, false)); |
1081 | 1081 |
deleteLineButton.setToolTipText(ConcordanceUIMessages.deleteSelectedLines); |
1082 |
deleteLineButton.setImage(IImageKeys.getImage(IImageKeys.CONCORDANCE_DELETE));
|
|
1082 |
deleteLineButton.setImage(IImageKeys.getImage(IImageKeys.ACTION_DELETE));
|
|
1083 | 1083 |
deleteLineButton.addSelectionListener(new SelectionListener() { |
1084 | 1084 |
@Override |
1085 | 1085 |
public void widgetSelected(SelectionEvent e) { |
tmp/org.txm.specificities.core/src/org/txm/specificities/core/functions/Specificities.java (revision 1395) | ||
---|---|---|
42 | 42 |
import org.txm.core.results.Parameter; |
43 | 43 |
import org.txm.core.results.TXMParameters; |
44 | 44 |
import org.txm.core.results.TXMResult; |
45 |
import org.txm.index.core.functions.Lexicon; |
|
46 | 45 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
47 | 46 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
48 | 47 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
49 | 48 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
49 |
import org.txm.searchengine.cqp.corpus.CQPLexicon; |
|
50 | 50 |
import org.txm.searchengine.cqp.corpus.Partition; |
51 | 51 |
import org.txm.searchengine.cqp.corpus.Property; |
52 | 52 |
import org.txm.searchengine.cqp.corpus.WordProperty; |
... | ... | |
107 | 107 |
private LexicalTable lexicalTable; |
108 | 108 |
|
109 | 109 |
/** The lexicon. */ |
110 |
private Lexicon lexicon; |
|
110 |
private CQPLexicon lexicon;
|
|
111 | 111 |
|
112 | 112 |
/** The sub lexicon. */ |
113 |
private Lexicon subLexicon; |
|
113 |
private CQPLexicon subLexicon;
|
|
114 | 114 |
|
115 | 115 |
/** The corpus. */ |
116 | 116 |
private CQPCorpus corpus = null; |
... | ... | |
499 | 499 |
* |
500 | 500 |
* @return the lexicon |
501 | 501 |
*/ |
502 |
public Lexicon getLexicon() { |
|
502 |
public CQPLexicon getLexicon() {
|
|
503 | 503 |
return lexicon; |
504 | 504 |
} |
505 | 505 |
|
tmp/org.txm.core/src/java/org/txm/importer/AddAttributeInXml.java (revision 1395) | ||
---|---|---|
109 | 109 |
{ |
110 | 110 |
try { |
111 | 111 |
File xmlfile = new File("/home/mdecorde/xml/xmlçàé/essai.xml"); |
112 |
File temp = File.createTempFile("temp", ".xml", xmlfile.getParentFile());
|
|
112 |
File temp = new File(xmlfile.getAbsolutePath()+"-rez.xml");
|
|
113 | 113 |
String tag = "titre"; |
114 |
ArrayList<Pair<String, String>> metadatas = new ArrayList<Pair<String, String>>(); |
|
115 |
metadatas.add(new Pair<String, String>("name1", "newvalue1")); |
|
116 |
metadatas.add(new Pair<String, String>("name2", "value2")); |
|
117 |
metadatas.add(new Pair<String, String>("name3", "value3")); |
|
114 |
HashMap<String, String> metadatas = new HashMap<String, String>(); |
|
115 |
metadatas.put("name1", "tricky value\""); |
|
116 |
metadatas.put("name2", "value&2"); |
|
117 |
metadatas.put("name3", "value<3"); |
|
118 |
metadatas.put("name4", "value>"); |
|
118 | 119 |
|
119 |
// AddAttributeInXml builder = new AddAttributeInXml(xmlfile, tag, metadatas); |
|
120 |
// builder.onlyOneElement(); |
|
121 |
// if (builder.process(temp)) { |
|
122 |
// if (!(xmlfile.delete() && temp.renameTo(xmlfile))) { |
|
123 |
// System.out.println("Warning can't rename file "+temp+" to "+xmlfile); |
|
124 |
// } |
|
125 |
// } else { |
|
126 |
// System.out.println("Error during process"); |
|
127 |
// } |
|
120 |
AddAttributeInXml builder = new AddAttributeInXml(xmlfile, tag, metadatas); |
|
121 |
builder.onlyOneElement(); |
|
122 |
if (builder.process(temp)) { |
|
123 |
System.out.println("Done: ok."); |
|
124 |
} else { |
|
125 |
System.out.println("Error during process"); |
|
126 |
} |
|
128 | 127 |
} catch(Exception e) { |
129 | 128 |
e.printStackTrace(); |
130 | 129 |
} |
tmp/org.txm.core/src/java/org/txm/Toolbox.java (revision 1395) | ||
---|---|---|
37 | 37 |
import java.text.Collator; |
38 | 38 |
import java.text.DateFormat; |
39 | 39 |
import java.text.SimpleDateFormat; |
40 |
import java.util.ArrayList; |
|
40 | 41 |
import java.util.Arrays; |
42 |
import java.util.HashMap; |
|
41 | 43 |
import java.util.LinkedHashMap; |
42 | 44 |
import java.util.Locale; |
43 | 45 |
import java.util.logging.Level; |
tmp/org.txm.core/src/java/org/txm/scripts/importer/XMLTXM2WTC.groovy (revision 1395) | ||
---|---|---|
289 | 289 |
basewritten = true; |
290 | 290 |
if (attrname == "project") |
291 | 291 |
projectwritten = true; |
292 |
output.write(" "+attrname+"=\""+parser.getAttributeValue(i).replace("\"", "'")+"\"" ); |
|
292 |
|
|
293 |
output.write(" "+attrname+"=\""+parser.getAttributeValue(i).replace("&", "&").replace("\"", """)+"\"" ); |
|
293 | 294 |
} |
294 | 295 |
|
295 | 296 |
if (localname.equals("text")) |
tmp/org.txm.core/src/java/org/txm/core/results/TXMResult.java (revision 1395) | ||
---|---|---|
13 | 13 |
import java.util.Date; |
14 | 14 |
import java.util.HashMap; |
15 | 15 |
import java.util.List; |
16 |
import java.util.TreeSet; |
|
16 | 17 |
import java.util.UUID; |
17 | 18 |
import java.util.concurrent.Semaphore; |
18 | 19 |
import java.util.regex.Pattern; |
... | ... | |
701 | 702 |
} |
702 | 703 |
} |
703 | 704 |
|
704 |
|
|
705 | 705 |
/** |
706 | 706 |
* Updates the dirty states by comparing TXMResult @Parameter with previously used parameters in the compute() method. |
707 | 707 |
* |
... | ... | |
719 | 719 |
*/ |
720 | 720 |
public final boolean isDirtyFromHistory() throws Exception { |
721 | 721 |
|
722 |
Class clazz = this.getClass(); |
|
723 |
|
|
724 |
Field[] fields = clazz.getDeclaredFields(); |
|
722 |
List<Field> fields = this.getAllFields(); |
|
723 |
|
|
725 | 724 |
for (Field f : fields) { |
726 | 725 |
Parameter parameter = f.getAnnotation(Parameter.class); |
727 | 726 |
if (parameter == null |
... | ... | |
731 | 730 |
) { |
732 | 731 |
continue; |
733 | 732 |
} |
734 |
|
|
735 | 733 |
String name; |
736 | 734 |
if (!parameter.key().isEmpty()) { |
737 | 735 |
name = parameter.key(); |
tmp/org.txm.core.tests/src/org/txm/core/tests/manual/ToolBoxTesterRCPApplication.java (revision 1395) | ||
---|---|---|
6 | 6 |
import org.eclipse.equinox.app.IApplication; |
7 | 7 |
import org.eclipse.equinox.app.IApplicationContext; |
8 | 8 |
import org.txm.Toolbox; |
9 |
import org.txm.index.core.functions.Lexicon;
|
|
9 |
import org.txm.searchengine.cqp.corpus.CQPLexicon;
|
|
10 | 10 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
11 | 11 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
12 | 12 |
import org.txm.searchengine.cqp.corpus.Property; |
... | ... | |
72 | 72 |
Property enpos = corpus.getProperty("enpos"); |
73 | 73 |
Property enlemma = corpus.getProperty("enlemma"); |
74 | 74 |
|
75 |
// Lexicon |
|
75 |
// CQP Lexicon
|
|
76 | 76 |
System.out.println("**************************************************"); |
77 | 77 |
System.out.println("** Lexicon tests"); |
78 | 78 |
System.out.println("**************************************************"); |
79 |
Lexicon lex1 = new Lexicon(corpus);
|
|
79 |
CQPLexicon lex1 = new CQPLexicon(corpus);
|
|
80 | 80 |
lex1.setProperty(word); |
81 |
lex1.compute(); |
|
81 |
lex1._compute();
|
|
82 | 82 |
System.out.println("Details: " + lex1.getDetails()); |
83 | 83 |
|
84 | 84 |
//FIXME: end of tests |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/importer.groovy (revision 1395) | ||
---|---|---|
148 | 148 |
//println "ptokenfiles "+ptokenfiles |
149 | 149 |
//TOKENIZE FILES |
150 | 150 |
List<File> tokenfiles; |
151 |
println "Tokenize=$tokenize sentence=$sentence" |
|
151 | 152 |
if (tokenize || sentence) { |
152 | 153 |
println "Tokenizing "+okfiles.size()+" files" |
153 | 154 |
for (File f : okfiles) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 1395) | ||
---|---|---|
69 | 69 |
int wordsPerPage = project.getEditionDefinition("default").getWordsPerPage() |
70 | 70 |
String page_element = project.getEditionDefinition("default").getPageElement() |
71 | 71 |
boolean build_edition = project.getEditionDefinition("default").getBuildEdition() |
72 |
boolean skipDoTokenizeStep = project.getDoTokenizerStep()
|
|
72 |
boolean doTokenizeStep = project.getDoTokenizerStep()
|
|
73 | 73 |
|
74 | 74 |
File srcDir = new File(rootDir); |
75 | 75 |
File binDir = project.getProjectDirectory() |
... | ... | |
185 | 185 |
def imp = new importer(); |
186 | 186 |
imp.doValidation(true) // change this to not validate xml |
187 | 187 |
|
188 |
imp.doTokenize(!skipDoTokenizeStep) // change this, to not tokenize xml
|
|
188 |
imp.doTokenize(doTokenizeStep) // change this, to not tokenize xml
|
|
189 | 189 |
imp.setStopIfMalformed(stopIfMalformed); |
190 | 190 |
if (!imp.run( srcDir, binDir, txmDir, basename, ignoredElements, lang)) { |
191 | 191 |
println "import process stopped"; |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/compiler.groovy (revision 1395) | ||
---|---|---|
253 | 253 |
// output.write(" "+name+"=\""+textmetadata.get(name)+"\"") |
254 | 254 |
for (int i = 0 ; i < parser.getAttributeCount() ; i++) { |
255 | 255 |
String attrname = parser.getAttributeLocalName(i); |
256 |
String attrvalue = parser.getAttributeValue(i) |
|
256 |
String attrvalue = parser.getAttributeValue(i).replaceAll("\"", """)
|
|
257 | 257 |
if (normalizeMetadata) |
258 | 258 |
attrvalue = attrvalue.toLowerCase(); |
259 | 259 |
if (attrname != "id") |
... | ... | |
319 | 319 |
String attrvalue = parser.getAttributeValue(i) |
320 | 320 |
if (normalizeMetadata) |
321 | 321 |
attrvalue = attrvalue.toLowerCase(); |
322 |
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue+"\"") |
|
322 |
output.write(" "+attrname.toLowerCase()+"=\""+attrvalue.replaceAll("\"", """)+"\"")
|
|
323 | 323 |
} |
324 | 324 |
if (parser.getAttributeCount() == 0) { // add the n attribute |
325 | 325 |
if (!ncounts.containsKey(localname)) ncounts.put(localname, 0); |
... | ... | |
461 | 461 |
corpus.setDescription("Built with the XML/w import module"); |
462 | 462 |
|
463 | 463 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
464 |
cqpFile.delete() |
|
465 |
|
|
464 | 466 |
new File(binDir,"cqp").mkdirs() |
465 | 467 |
new File(binDir,"data").mkdirs() |
466 | 468 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/compiler.groovy (revision 1395) | ||
---|---|---|
765 | 765 |
corpus.setDescription("Built with the TXT+CSV import module"); |
766 | 766 |
|
767 | 767 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
768 |
cqpFile.delete() |
|
768 | 769 |
new File(binDir,"cqp").mkdirs() |
769 | 770 |
new File(binDir,"data").mkdirs() |
770 | 771 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/compiler.groovy (revision 1395) | ||
---|---|---|
193 | 193 |
corpus.setDescription("Built with the TXT+CSV import module"); |
194 | 194 |
|
195 | 195 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
196 |
cqpFile.delete() |
|
196 | 197 |
new File(binDir,"cqp").mkdirs() |
197 | 198 |
new File(binDir,"data").mkdirs() |
198 | 199 |
new File(binDir,"registry").mkdirs() |
... | ... | |
209 | 210 |
//1- Transform into CQP file |
210 | 211 |
println("Compiling "+txmDir.listFiles().length+" files") |
211 | 212 |
XMLTXM2CQP cqpbuilder = null; |
213 |
cqpFile.delete() |
|
212 | 214 |
ArrayList<File> files = txmDir.listFiles(); |
213 | 215 |
Collections.sort(files); |
214 | 216 |
for (File txmfile : files) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/compiler.groovy (revision 1395) | ||
---|---|---|
188 | 188 |
corpus.setDescription("Build the alceste import module"); |
189 | 189 |
|
190 | 190 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
191 |
cqpFile.delete() |
|
191 | 192 |
new File(binDir,"cqp").mkdirs() |
192 | 193 |
new File(binDir,"data").mkdirs() |
193 | 194 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/compiler.groovy (revision 1395) | ||
---|---|---|
200 | 200 |
corpus.setDescription("Built with the TXT+CSV import module"); |
201 | 201 |
|
202 | 202 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
203 |
cqpFile.delete() |
|
203 | 204 |
new File(binDir,"cqp").mkdirs() |
204 | 205 |
new File(binDir,"data").mkdirs() |
205 | 206 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/cqp/compiler.groovy (revision 1395) | ||
---|---|---|
101 | 101 |
corpus.setDescription("Built with the TXT+CSV import module"); |
102 | 102 |
|
103 | 103 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
104 |
cqpFile.delete() |
|
104 | 105 |
new File(binDir,"cqp").mkdirs() |
105 | 106 |
new File(binDir,"data").mkdirs() |
106 | 107 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZCompiler.groovy (revision 1395) | ||
---|---|---|
214 | 214 |
println " Word properties: "+pAttributes |
215 | 215 |
println " Structures: "+sargs |
216 | 216 |
File allcqpFile = new File(cqpDirectory, "all.cqp"); |
217 |
allcqpFile.delete() |
|
217 | 218 |
try { |
218 | 219 |
if (!CwbEncode.concat(cqpFiles, allcqpFile)) { |
219 | 220 |
println "Fail to write the master cqp file: "+allcqpFile |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 1395) | ||
---|---|---|
40 | 40 |
def xslParams; |
41 | 41 |
String lang; |
42 | 42 |
String wordTag; |
43 |
boolean skipDoTokenizeStep = false;
|
|
43 |
boolean doTokenizeStep = false;
|
|
44 | 44 |
|
45 | 45 |
Metadatas metadata = null; // text metadata |
46 | 46 |
|
... | ... | |
99 | 99 |
lang = project.getLang(); |
100 | 100 |
|
101 | 101 |
wordTag = project.getTokenizerWordElement() |
102 |
this.skipDoTokenizeStep = !project.getDoTokenizerStep()
|
|
102 |
this.doTokenizeStep = project.getDoTokenizerStep()
|
|
103 | 103 |
|
104 | 104 |
//prepare metadata if any |
105 | 105 |
File allMetadataFile = Metadatas.findMetadataFile(inputDirectory); |
... | ... | |
344 | 344 |
} |
345 | 345 |
|
346 | 346 |
//if (wordTag != "w") { |
347 |
if (skipDoTokenizeStep) {
|
|
347 |
if (doTokenizeStep) {
|
|
348 | 348 |
println "No tokenization do to." |
349 | 349 |
// ConsoleProgressBar cpb = new ConsoleProgressBar(filesToProcess.size()) |
350 | 350 |
for (File f : filesToProcess) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xmltxm/compiler.groovy (revision 1395) | ||
---|---|---|
364 | 364 |
corpus.setDescription("Built with the TXT+CSV import module"); |
365 | 365 |
|
366 | 366 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
367 |
cqpFile.delete() |
|
367 | 368 |
new File(binDir,"cqp").mkdirs() |
368 | 369 |
new File(binDir,"data").mkdirs() |
369 | 370 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/discours/compiler.groovy (revision 1395) | ||
---|---|---|
188 | 188 |
corpus.setDescription("Built with the TXT+CSV import module"); |
189 | 189 |
|
190 | 190 |
File cqpFile = new File(binDir,"cqp/"+corpusname+".cqp"); |
191 |
cqpFile.delete() |
|
191 | 192 |
new File(binDir,"cqp").mkdirs() |
192 | 193 |
new File(binDir,"data").mkdirs() |
193 | 194 |
new File(binDir,"registry").mkdirs() |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/handlers/ComputeLexicon.java (revision 1395) | ||
---|---|---|
6 | 6 |
import org.eclipse.core.commands.ExecutionEvent; |
7 | 7 |
import org.eclipse.core.commands.ExecutionException; |
8 | 8 |
import org.txm.index.core.functions.Index; |
9 |
import org.txm.index.rcp.editors.DictionnaryEditor; |
|
9 |
import org.txm.index.core.functions.Lexicon; |
|
10 |
import org.txm.index.rcp.editors.LexiconEditor; |
|
10 | 11 |
import org.txm.rcp.editors.TXMEditor; |
11 | 12 |
import org.txm.rcp.handlers.BaseAbstractHandler; |
12 | 13 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
13 | 14 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
14 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
15 | 15 |
|
16 | 16 |
|
17 | 17 |
/** |
... | ... | |
33 | 33 |
|
34 | 34 |
Object selection = this.getCorporaViewSelectedObject(event); |
35 | 35 |
|
36 |
Index index = null;
|
|
36 |
Lexicon index = null;
|
|
37 | 37 |
|
38 | 38 |
// New Lexicon from Corpus |
39 | 39 |
if (selection instanceof CQPCorpus) { |
40 | 40 |
CQPCorpus corpus = (CQPCorpus) selection; |
41 |
index = new Index(corpus); |
|
42 |
index.setLexiconMode(true); |
|
41 |
index = new Lexicon(corpus); |
|
43 | 42 |
try { |
44 |
index.setQuery(new CQLQuery("[]")); //$NON-NLS-1$ |
|
45 | 43 |
index.setProperties(Arrays.asList(corpus.getWordProperty())); |
46 |
index.setVisible(false); |
|
47 | 44 |
} catch (CqiClientException e) { |
48 | 45 |
// TODO Auto-generated catch block |
49 | 46 |
e.printStackTrace(); |
50 | 47 |
} |
51 | 48 |
} |
52 |
// // Reopening an existing Lexicon
|
|
53 |
// else if(selection instanceof Index) {
|
|
54 |
// index = (Index) selection;
|
|
55 |
// }
|
|
49 |
// Reopening an existing Lexicon |
|
50 |
else if(selection instanceof Lexicon) {
|
|
51 |
index = (Lexicon) selection;
|
|
52 |
} |
|
56 | 53 |
else { |
57 | 54 |
super.logCanNotExecuteCommand(selection); |
58 | 55 |
} |
... | ... | |
63 | 60 |
} |
64 | 61 |
|
65 | 62 |
public static void open(Index lexicon) { |
66 |
TXMEditor.openEditor(lexicon, DictionnaryEditor.class.getName());
|
|
63 |
TXMEditor.openEditor(lexicon, LexiconEditor.class.getName());
|
|
67 | 64 |
} |
68 | 65 |
} |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/adapters/LexiconAdapterFactory.java (revision 1395) | ||
---|---|---|
29 | 29 |
return new TXMResultAdapter() { |
30 | 30 |
@Override |
31 | 31 |
public ImageDescriptor getImageDescriptor(Object object) { |
32 |
// TODO Auto-generated method stub |
|
33 | 32 |
return ICON; |
34 | 33 |
} |
35 | 34 |
}; |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/adapters/IndexAdapterFactory.java (revision 1395) | ||
---|---|---|
6 | 6 |
import org.eclipse.ui.plugin.AbstractUIPlugin; |
7 | 7 |
import org.osgi.framework.FrameworkUtil; |
8 | 8 |
import org.txm.index.core.functions.Index; |
9 |
import org.txm.index.core.functions.Lexicon; |
|
10 | 9 |
import org.txm.rcp.adapters.TXMResultAdapter; |
11 | 10 |
import org.txm.rcp.adapters.TXMResultAdapterFactory; |
11 |
import org.txm.searchengine.cqp.corpus.CQPLexicon; |
|
12 | 12 |
|
13 | 13 |
|
14 | 14 |
/** |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/editors/DictionnaryEditor.java (revision 1395) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
package org.txm.index.rcp.editors; |
|
3 |
|
|
4 |
import org.eclipse.swt.layout.GridData; |
|
5 |
|
|
6 |
/** |
|
7 |
* Extends the index editor to : |
|
8 |
* - set the number of property to 1 |
|
9 |
* - hide the query field |
|
10 |
* |
|
11 |
* @author mdecorde. |
|
12 |
*/ |
|
13 |
public class DictionnaryEditor extends IndexEditor { |
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
@Override |
|
18 |
public void _createPartControl() { |
|
19 |
super._createPartControl(); |
|
20 |
|
|
21 |
this.propertiesSelector.setMaxPropertyNumber(1); |
|
22 |
|
|
23 |
this.queryWidget.setText("[]"); //$NON-NLS-1$ |
|
24 |
this.queryWidget.setEnabled(false); |
|
25 |
this.queryWidget.setVisible(false); |
|
26 |
this.queryWidget.setSize(0, 0); |
|
27 |
this.queryWidget.setLayoutData(new GridData(0, 0)); |
|
28 |
//this.queryWidget.dispose(); |
|
29 |
this.queryLabel.setVisible(false); |
|
30 |
this.queryLabel.dispose(); |
|
31 |
|
|
32 |
// no query field -> no need to expand the zone |
|
33 |
getMainParametersComposite().setLayoutData(new GridData(GridData.FILL, GridData.FILL, false, false)); |
|
34 |
|
|
35 |
} |
|
36 |
|
|
37 |
} |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/editors/LexiconEditor.java (revision 1395) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
package org.txm.index.rcp.editors; |
|
3 |
|
|
4 |
import org.eclipse.swt.layout.GridData; |
|
5 |
|
|
6 |
/** |
|
7 |
* Extends the index editor to: |
|
8 |
* - set the number of property to 1 |
|
9 |
* - hide the query field |
|
10 |
* |
|
11 |
* @author mdecorde. |
|
12 |
*/ |
|
13 |
public class LexiconEditor extends IndexEditor { |
|
14 |
|
|
15 |
@Override |
|
16 |
public void _createPartControl() { |
|
17 |
super._createPartControl(); |
|
18 |
|
|
19 |
this.propertiesSelector.setMaxPropertyNumber(1); |
|
20 |
|
|
21 |
this.queryWidget.setText("[]"); //$NON-NLS-1$ |
|
22 |
this.queryWidget.setEnabled(false); |
|
23 |
this.queryWidget.setVisible(false); |
|
24 |
this.queryWidget.setSize(0, 0); |
|
25 |
this.queryWidget.setLayoutData(new GridData(0, 0)); |
|
26 |
//this.queryWidget.dispose(); |
|
27 |
this.queryLabel.setVisible(false); |
|
28 |
this.queryLabel.dispose(); |
|
29 |
|
|
30 |
// no query field -> no need to expand the zone |
|
31 |
getMainParametersComposite().setLayoutData(new GridData(GridData.FILL, GridData.FILL, false, false)); |
|
32 |
} |
|
33 |
} |
|
0 | 34 |
tmp/org.txm.index.rcp/src/org/txm/index/rcp/editors/IndexEditor.java (revision 1395) | ||
---|---|---|
100 | 100 |
|
101 | 101 |
/** The l t info. */ |
102 | 102 |
protected Label lTInfo; |
103 |
// result |
|
103 |
|
|
104 | 104 |
/** The line table viewer. */ |
105 | 105 |
protected TableViewer viewer; |
106 | 106 |
|
... | ... | |
446 | 446 |
TXMEditor.initContextMenu(this.viewer.getTable(), this.getSite(), this.viewer); |
447 | 447 |
|
448 | 448 |
|
449 |
if (queryWidget != null && !queryWidget.isDisposed()) { |
|
450 |
queryWidget.setFocus(); |
|
451 |
} |
|
449 |
setFocus(); |
|
452 | 450 |
|
453 | 451 |
} |
454 | 452 |
|
... | ... | |
537 | 535 |
*/ |
538 | 536 |
@Override |
539 | 537 |
public void setFocus() { |
540 |
if (!this.queryWidget.isDisposed()) { |
|
538 |
if (this.queryWidget != null && !this.queryWidget.isDisposed()) {
|
|
541 | 539 |
this.queryWidget.setFocus(); |
542 | 540 |
} |
543 | 541 |
StatusLine.setMessage(IndexUIMessages.openingTheIndexResults); |
tmp/org.txm.index.rcp/plugin.xml (revision 1395) | ||
---|---|---|
175 | 175 |
name="%editor.name"> |
176 | 176 |
</editor> |
177 | 177 |
<editor |
178 |
class="org.txm.index.rcp.editors.DictionnaryEditor"
|
|
178 |
class="org.txm.index.rcp.editors.LexiconEditor"
|
|
179 | 179 |
default="false" |
180 | 180 |
icon="icons/lexicon.png" |
181 |
id="org.txm.index.rcp.editors.DictionnaryEditor"
|
|
181 |
id="org.txm.index.rcp.editors.LexiconEditor"
|
|
182 | 182 |
name="%editor.name.0"> |
183 | 183 |
</editor> |
184 | 184 |
<editor |
... | ... | |
236 | 236 |
categoryId="org.txm.rcp.category.txm" |
237 | 237 |
defaultHandler="org.txm.index.rcp.handlers.ComputeLexicon" |
238 | 238 |
id="org.txm.index.rcp.handlers.ComputeLexicon" |
239 |
name="%command.name.0"> |
|
239 |
name="%command.name.0" |
|
240 |
returnTypeId="org.txm.index.core.functions.Lexicon"> |
|
240 | 241 |
</command> |
241 | 242 |
|
242 | 243 |
<command |
... | ... | |
296 | 297 |
<with |
297 | 298 |
variable="activePart"> |
298 | 299 |
<instanceof |
299 |
value="org.txm.index.rcp.editors.DictionnaryEditor">
|
|
300 |
value="org.txm.index.rcp.editors.LexiconEditor">
|
|
300 | 301 |
</instanceof> |
301 | 302 |
</with> |
302 | 303 |
</definition> |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPLexicon.java (revision 1395) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2016-09-19 10:31:00 +0200 (Mon, 19 Sep 2016) $ |
|
25 |
// $LastChangedRevision: 3298 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.searchengine.cqp.corpus; |
|
29 |
|
|
30 |
import java.io.File; |
|
31 |
import java.io.FileNotFoundException; |
|
32 |
import java.io.FileOutputStream; |
|
33 |
import java.io.IOException; |
|
34 |
import java.io.OutputStreamWriter; |
|
35 |
import java.io.UnsupportedEncodingException; |
|
36 |
import java.util.ArrayList; |
|
37 |
import java.util.Arrays; |
|
38 |
import java.util.List; |
|
39 |
import java.util.Map; |
|
40 |
|
|
41 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
42 |
import org.eclipse.osgi.util.NLS; |
|
43 |
import org.txm.Toolbox; |
|
44 |
import org.txm.core.preferences.TXMPreferences; |
|
45 |
import org.txm.core.results.Parameter; |
|
46 |
import org.txm.core.results.TXMParameters; |
|
47 |
import org.txm.core.results.TXMResult; |
|
48 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
49 |
import org.txm.searchengine.cqp.ICqiClient; |
|
50 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
51 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
52 |
//import org.txm.statsengine.core.StatException; |
|
53 |
//import org.txm.statsengine.core.data.Vector; |
|
54 |
//import org.txm.statsengine.r.core.data.VectorImpl; |
|
55 |
import org.txm.utils.logger.Log; |
|
56 |
|
|
57 |
// TODO should be put into stat.data package ? |
|
58 |
/** |
|
59 |
* Represent a frequency list according to a {@link CQPCorpus} (or a. |
|
60 |
* |
|
61 |
* {@link Subcorpus}) and a {@link Property}. |
|
62 |
* @author sloiseau |
|
63 |
*/ |
|
64 |
public class CQPLexicon { |
|
65 |
|
|
66 |
|
|
67 |
/** The forms. */ |
|
68 |
private String[] forms; |
|
69 |
|
|
70 |
/** The freqs. */ |
|
71 |
private int[] freqs; |
|
72 |
|
|
73 |
/** The ids. */ |
|
74 |
private int[] ids; |
|
75 |
|
|
76 |
/** The number of tokens. */ |
|
77 |
int numberOfTokens = -1; |
|
78 |
|
|
79 |
/** The symbol. */ |
|
80 |
private String symbol; |
|
81 |
|
|
82 |
/** The writer. */ |
|
83 |
private OutputStreamWriter writer; |
|
84 |
|
|
85 |
/** |
|
86 |
* The property. |
|
87 |
*/ |
|
88 |
protected Property pProperty; |
|
89 |
|
|
90 |
private CQPCorpus corpus; |
|
91 |
|
|
92 |
/** |
|
93 |
* @param corpus |
|
94 |
*/ |
|
95 |
public CQPLexicon(CQPCorpus corpus) { |
|
96 |
this.corpus = corpus; |
|
97 |
} |
|
98 |
|
|
99 |
public boolean _compute() throws Exception { |
|
100 |
if (this.getParent() instanceof MainCorpus) { |
|
101 |
return computeWithMainCorpus((MainCorpus)this.getParent(), pProperty, null); |
|
102 |
} |
|
103 |
else if (this.getParent() instanceof Subcorpus) { |
|
104 |
return computewithSubCorpus((Subcorpus)this.getParent(), pProperty, null); |
|
105 |
} |
|
106 |
else { |
|
107 |
Log.severe("Error: Lexicon parent is neither a Maincorpus nor a Subcorpus."); //$NON-NLS-1$ |
|
108 |
return false; |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
/** |
|
113 |
* Gets the lexicon relative to a given property. |
|
114 |
* |
|
115 |
* @param property |
|
116 |
* the property |
|
117 |
* |
|
118 |
* @return the lexicon |
|
119 |
* |
|
120 |
* @throws CqiClientException |
|
121 |
* the cqi client exception |
|
122 |
*/ |
|
123 |
protected boolean computeWithMainCorpus(MainCorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
124 |
// System.out.println("in "+this.getCqpId()+" look for cached lexicon "+property); |
|
125 |
// System.out.println("not found"); |
|
126 |
int lexiconSize; |
|
127 |
try { |
|
128 |
lexiconSize = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(property.getQualifiedName()); |
|
129 |
} catch (Exception e) { |
|
130 |
throw new CqiClientException(e); |
|
131 |
} |
|
132 |
|
|
133 |
int[] ids = new int[lexiconSize]; |
|
134 |
for (int i = 0; i < ids.length; i++) { |
|
135 |
ids[i] = i; |
|
136 |
} |
|
137 |
|
|
138 |
int[] freqs; |
|
139 |
try { |
|
140 |
freqs = CorpusManager.getCorpusManager().getCqiClient().id2Freq(property.getQualifiedName(), ids); |
|
141 |
} catch (Exception e) { |
|
142 |
throw new CqiClientException(e); |
|
143 |
} |
|
144 |
|
|
145 |
this.init(corpus, property, freqs, ids); |
|
146 |
return true; |
|
147 |
} |
|
148 |
|
|
149 |
/** |
|
150 |
* |
|
151 |
* @param corpus |
|
152 |
* @param property |
|
153 |
* @param monitor |
|
154 |
* @return |
|
155 |
* @throws CqiClientException |
|
156 |
*/ |
|
157 |
// FIXME: why this method needs to create and delete some new subcorpus???? the computing can't be done directly on the corpus argument??? |
|
158 |
// eg. dist = CorpusManager.getCorpusManager().getCqiClient().fdist1(corpus.getQualifiedCqpId(), 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName()); |
|
159 |
protected boolean computewithSubCorpus(Subcorpus corpus, Property property, IProgressMonitor monitor) throws CqiClientException { |
|
160 |
|
|
161 |
//System.out.println("not found"); |
|
162 |
// Log.finest(NLS.bind(IndexCoreMessages.computingTheLexiconOfSubcorpusP0, corpus.getName())); |
|
163 |
//long start = System.currentTimeMillis(); |
|
164 |
int[][] fdist = null; |
|
165 |
AbstractCqiClient cqi = CorpusManager.getCorpusManager().getCqiClient(); |
|
166 |
String tmp = "TMP" + CQPCorpus.getNextSubcorpusCounter(); |
|
167 |
String qtmp = corpus.getMainCorpus().getQualifiedCqpId()+":"+tmp; |
|
168 |
try { |
|
169 |
System.out.println("subcorpus: "+corpus.getQualifiedCqpId()); |
|
170 |
System.out.println("query subcorpus: "+qtmp); |
|
171 |
cqi.cqpQuery(corpus.getQualifiedCqpId(), tmp, "[]"); |
|
172 |
fdist = CorpusManager.getCorpusManager().getCqiClient().fdist1(qtmp, 0, ICqiClient.CQI_CONST_FIELD_MATCH, property.getName()); |
|
173 |
//System.out.println("nb lines: "+fdist.length); |
|
174 |
} catch (Exception e) { |
|
175 |
throw new CqiClientException(e); |
|
176 |
} finally { |
|
177 |
try { |
|
178 |
cqi.dropSubCorpus(qtmp); |
|
179 |
} catch (IOException e) { |
|
180 |
// TODO Auto-generated catch block |
|
181 |
e.printStackTrace(); |
|
182 |
} catch (CqiServerError e) { |
|
183 |
// TODO Auto-generated catch block |
|
184 |
e.printStackTrace(); |
|
185 |
} |
|
186 |
} |
|
187 |
int lexiconSize = fdist.length; |
|
188 |
|
|
189 |
int[] freqs = new int[lexiconSize]; |
|
190 |
int[] ids = new int[lexiconSize]; |
|
191 |
for (int i = 0; i < fdist.length; i++) { |
|
192 |
ids[i] = fdist[i][0]; |
|
193 |
freqs[i] = fdist[i][1]; |
|
194 |
} |
|
195 |
|
|
196 |
init(corpus, property, freqs, ids); |
|
197 |
return true; |
|
198 |
} |
|
199 |
|
|
200 |
/** |
|
201 |
* Compute number of tokens. / this.nbr |
|
202 |
*/ |
|
203 |
private void computeNumberOfTokens() { |
|
204 |
numberOfTokens = 0; |
|
205 |
for (int i = 0; i < freqs.length; i++) { |
|
206 |
numberOfTokens += freqs[i]; |
|
207 |
// System.out.println(numberOfTokens); |
|
208 |
// if (freqs[i] != 1) System.out.println(freqs[i]); |
|
209 |
} |
|
210 |
} |
|
211 |
|
|
212 |
/** |
|
213 |
* Dump lexicon forms and frequencies in a String. |
|
214 |
* |
|
215 |
* @param col the col |
|
216 |
* @param txt the txt |
|
217 |
* @return the string |
|
218 |
*/ |
|
219 |
public String dump(String col, String txt) { |
|
220 |
StringBuffer buffer = new StringBuffer(); |
|
221 |
getForms(); |
|
222 |
for (int i = 0; i < forms.length; i++) { |
|
223 |
buffer.append(txt + forms[i].replace(txt, txt + txt) + txt + col + freqs[i] + "\n"); //$NON-NLS-1$ |
|
224 |
} |
|
225 |
return buffer.toString(); |
|
226 |
} |
|
227 |
|
|
228 |
/* (non-Javadoc) |
|
229 |
* @see java.lang.Object#equals(java.lang.Object) |
|
230 |
*/ |
|
231 |
@Override |
|
232 |
public boolean equals(Object obj) { |
|
233 |
if (!(obj instanceof CQPLexicon)) { |
|
234 |
return false; |
|
235 |
} |
|
236 |
CQPLexicon other = (CQPLexicon) obj; |
|
237 |
|
|
238 |
if (other.nbrOfType() != this.nbrOfType()) { |
|
239 |
return false; |
|
240 |
} |
|
241 |
return (Arrays.equals(freqs, other.getFreq()) && Arrays.equals(getForms(), other.getForms())); |
|
242 |
} |
|
243 |
|
|
244 |
/** |
|
245 |
* The corpus or subcorpus this lexicon is build on. |
|
246 |
* |
|
247 |
* @return the corpus |
|
248 |
*/ |
|
249 |
public CQPCorpus getCorpus() { |
|
250 |
return corpus; |
|
251 |
} |
|
252 |
|
|
253 |
|
|
254 |
public String getDetails() { |
|
255 |
return this.getParent().getName() + " " + this.pProperty.getName(); //$NON-NLS-1$ |
|
256 |
} |
|
257 |
|
|
258 |
//TODO: move this into a Lexicon chart renderer |
|
259 |
// /** |
|
260 |
// * Draw a pareto graphic with this frequency list and record it into the |
|
261 |
// * provided filename into svg format. |
|
262 |
// * |
|
263 |
// * @param file where to save the pareto graphic. |
|
264 |
// * @return the pareto graphic |
|
265 |
// * @throws StatException if anything goes wrong. |
|
266 |
// */ |
|
267 |
// public void getParetoGraphic(File file) throws StatException { |
|
268 |
// String rName = asVector().getSymbol(); |
|
269 |
// String expr = "pareto(" + rName + ")"; //$NON-NLS-1$ //$NON-NLS-2$ |
|
270 |
// try { |
|
271 |
// RWorkspace.getRWorkspaceInstance().plot(file, expr, RDevice.SVG); |
|
272 |
// } catch (Exception e) { |
|
273 |
// throw new StatException(e); |
|
274 |
// } |
|
275 |
// } |
|
276 |
|
|
277 |
/** |
|
278 |
* The dif ferent types in the lexicon, the type at the index <code>j</code> |
|
279 |
* of this array have the frequency at index <code>j</code> in the array |
|
280 |
* returned by {@link #getFreq()}. |
|
281 |
* |
|
282 |
* @return types as an array of <code>String</code> |
|
283 |
*/ |
|
284 |
public String[] getForms() { |
|
285 |
if (forms == null) { |
|
286 |
if(ids == null) { |
|
287 |
return new String[0]; |
|
288 |
} |
|
289 |
try { |
|
290 |
forms = CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), ids); |
|
291 |
} catch (Exception e) { |
|
292 |
// TODO Auto-generated catch block |
|
293 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
294 |
} |
|
295 |
} |
|
296 |
return forms; |
|
297 |
} |
|
298 |
|
|
299 |
/** |
|
300 |
* The dif ferent types in the lexicon, the type at the index <code>j</code> |
|
301 |
* of this array have the frequency at index <code>j</code> in the array |
|
302 |
* returned by {@link #getFreq()}. |
|
303 |
* |
|
304 |
* @param number the number |
|
305 |
* @return types as an array of <code>String</code> |
|
306 |
*/ |
|
307 |
public String[] getForms(int number) { |
|
308 |
//System.out.println("Lexicon("+this.property+" get forms. number="+number+", ids len="+ids.length); |
|
309 |
if (forms == null) { |
|
310 |
try { |
|
311 |
number = Math.min(number, ids.length); |
|
312 |
if (number <= 0) { |
|
313 |
return new String[0]; |
|
314 |
} |
|
315 |
int[] subpositions = new int[number]; |
|
316 |
System.arraycopy(ids, 0, subpositions, 0, number); |
|
317 |
return CorpusManager.getCorpusManager().getCqiClient().id2Str(pProperty.getQualifiedName(), subpositions); |
|
318 |
} catch (Exception e) { |
|
319 |
// TODO Auto-generated catch block |
|
320 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
321 |
return null; |
|
322 |
} |
|
323 |
} else { |
|
324 |
number = Math.min(number, ids.length); |
|
325 |
if (number <= 0) { |
|
326 |
return new String[0]; |
|
327 |
} |
|
328 |
String[] subforms = new String[number]; |
|
329 |
System.arraycopy(ids, 0, subforms, 0, number); |
|
330 |
return subforms; |
|
331 |
} |
|
332 |
} |
|
333 |
|
|
334 |
/** |
|
335 |
* The dif ferent frequencies in the lexicon. See {@link #getForms()}. |
|
336 |
* |
|
337 |
* @return frequencies as an array of <code>int</code> |
|
338 |
*/ |
|
339 |
public int[] getFreq() { |
|
340 |
return freqs; |
|
341 |
} |
|
342 |
|
|
343 |
/** |
|
344 |
* return the ids of the entries. |
|
345 |
* |
|
346 |
* @return types as an array of <code>String</code> |
|
347 |
*/ |
|
348 |
public int[] getIds() { |
|
349 |
return ids; |
|
350 |
} |
|
351 |
|
|
352 |
public String getName() { |
|
353 |
try { |
|
354 |
return this.corpus.getSimpleName() + ": " + this.getSimpleName(); |
|
355 |
} |
|
356 |
catch(Exception e) { |
|
357 |
} |
|
358 |
return ""; //$NON-NLS-1$ |
|
359 |
} |
|
360 |
|
|
361 |
/** |
|
362 |
* The property this lexicon is build on. |
|
363 |
* |
|
364 |
* @return the property |
|
365 |
*/ |
|
366 |
public Property getProperty() { |
|
367 |
return pProperty; |
|
368 |
} |
|
369 |
|
|
370 |
public String getSimpleName() { |
|
371 |
return this.getProperty().getName(); |
|
372 |
} |
|
373 |
|
|
374 |
/** |
|
375 |
* Gets the symbol. |
|
376 |
* |
|
377 |
* @return the symbol |
|
378 |
*/ |
|
379 |
public String getSymbol() { |
|
380 |
return this.symbol; |
|
381 |
} |
|
382 |
|
|
383 |
/** |
|
384 |
* Hack frequencies using a map to set forms and frequencies |
|
385 |
* |
|
386 |
* @param corpus the corpus |
|
387 |
* @param pProperty the property |
|
388 |
* @param map the map |
|
389 |
* {@link CQPCorpus#getLexicon(Property)} or |
|
390 |
* {@link Subcorpus#getLexicon(Property)}. |
|
391 |
*/ |
|
392 |
public boolean hack(Map<String, Integer> map) { |
|
393 |
if (map.size() != forms.length) { |
|
394 |
return false; |
|
395 |
} |
|
396 |
|
|
397 |
//super(corpus); |
|
398 |
int size = map.size(); |
|
399 |
int[] freqs = new int[size]; |
|
400 |
String[] forms = map.keySet().toArray(new String[] {}); |
|
401 |
for (int i = 0; i < forms.length; i++) { |
|
402 |
freqs[i] = map.get(forms[i]); |
|
403 |
} |
|
404 |
|
|
405 |
this.freqs = freqs; |
|
406 |
return true; |
|
407 |
} |
|
408 |
|
|
409 |
/** |
|
410 |
* Protected on purpose: should be accessed through others initializer. |
|
411 |
* |
|
412 |
* @param corpus the corpus |
|
413 |
* @param property the property |
|
414 |
* @param freq the freq |
|
415 |
* @param ids the ids |
|
416 |
* {@link CQPCorpus#getLexicon(Property)} or |
|
417 |
* {@link Subcorpus#getLexicon(Property)}. |
|
418 |
*/ |
|
419 |
protected void init(TXMResult corpus, Property property, int[] freq, int[] ids) { |
|
420 |
if (freq.length != ids.length) { |
|
421 |
throw new IllegalArgumentException("wrong size freq.length != ids.length "+freq.length+" != "+ids.length); |
|
422 |
} |
|
423 |
this.freqs = freq; |
|
424 |
this.ids = ids; |
|
425 |
this.forms = null; |
|
426 |
this.pProperty = property; |
|
427 |
} |
|
428 |
|
|
429 |
|
|
430 |
/** |
|
431 |
* Number of tokens (sum of all the frequencies) in the corpus. |
|
432 |
* |
|
433 |
* @return the size of the corpus or subcorpus. |
|
434 |
*/ |
|
435 |
public int nbrOfToken() { |
|
436 |
if (numberOfTokens <= 0) { |
|
437 |
computeNumberOfTokens(); |
|
438 |
} |
|
439 |
return numberOfTokens; |
|
440 |
} |
|
441 |
|
|
442 |
|
|
443 |
/** |
|
444 |
* Number of different types in the frequency list. |
|
445 |
* |
|
446 |
* @return number of types in the corpus or subcorpus. |
|
447 |
*/ |
|
448 |
public int nbrOfType() { |
|
449 |
try { |
|
450 |
return freqs.length; |
|
451 |
} |
|
452 |
catch (Exception e) { |
|
453 |
return 0; |
|
454 |
} |
|
455 |
} |
|
456 |
|
|
457 |
public void setParameters(Property property) { |
|
458 |
this.pProperty = property; |
|
459 |
} |
|
460 |
|
|
461 |
public boolean setParameters(TXMParameters parameters) { |
|
462 |
try { |
|
463 |
Property p = (Property) parameters.get("properties"); |
|
464 |
this.setParameters(p); |
|
465 |
} catch (Exception e) { |
|
466 |
Log.printStackTrace(e); |
|
467 |
return false; |
|
468 |
} |
|
469 |
return true; |
|
470 |
} |
|
471 |
|
|
472 |
/** |
|
473 |
* Sets the symbol. |
|
474 |
* |
|
475 |
* @param symbol the new symbol |
|
476 |
*/ |
|
477 |
public void setSymbol(String symbol) { |
|
478 |
this.symbol = symbol; |
|
479 |
} |
|
480 |
|
|
481 |
|
|
482 |
@Override |
|
483 |
public String toString() { |
|
484 |
return this.getName(); |
|
485 |
} |
|
486 |
|
|
487 |
/** |
|
488 |
* To txt. |
|
489 |
* |
|
490 |
* @param outfile the outfile |
|
491 |
* @param encoding the encoding |
|
492 |
* @param colseparator the colseparator |
|
493 |
* @param txtseparator the txtseparator |
|
494 |
* @return true, if successful |
|
495 |
*/ |
|
496 |
@Deprecated |
|
497 |
public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) { |
|
498 |
// NK: writer declared as class attribute to perform a clean if the operation is interrupted |
|
499 |
// OutputStreamWriter writer; |
|
500 |
try { |
|
501 |
this.writer = new OutputStreamWriter(new FileOutputStream(outfile), |
|
502 |
encoding); |
|
503 |
} catch (UnsupportedEncodingException e1) { |
|
504 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
505 |
return false; |
|
506 |
} catch (FileNotFoundException e1) { |
|
507 |
org.txm.utils.logger.Log.printStackTrace(e1); |
|
508 |
return false; |
|
509 |
} |
|
510 |
|
|
511 |
try { |
|
512 |
writer.write(this.dump(colseparator, txtseparator)); |
|
513 |
writer.close(); |
|
514 |
} catch (IOException e) { |
|
515 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
516 |
return false; |
|
517 |
} |
|
518 |
|
|
519 |
return true; |
|
520 |
} |
|
521 |
|
|
522 |
/** |
|
523 |
* Sets the unit property. |
|
524 |
* @param property the unit property |
|
525 |
*/ |
|
526 |
public void setProperty(Property property) { |
|
527 |
this.pProperty = property; |
|
528 |
} |
|
529 |
|
|
530 |
|
|
531 |
public static CQPLexicon getLexicon(CQPCorpus corpus, Property property, IProgressMonitor monitor, boolean visible) throws Exception { |
|
532 |
CQPLexicon lexicon = null; |
|
533 |
|
|
534 |
// recycling parent Lexicon if exists |
|
535 |
ArrayList<CQPLexicon> partLexicons = (ArrayList<CQPLexicon>) corpus.getObjectCacheForClass(CQPLexicon.class); |
|
536 |
for (int i = 0; i < partLexicons.size(); i++) { |
|
537 |
if(partLexicons.get(i).getProperty() == property) { |
|
538 |
lexicon = partLexicons.get(i); |
|
539 |
break; |
|
540 |
} |
|
541 |
} |
|
542 |
|
|
543 |
// creating new Lexicon |
|
544 |
if(lexicon == null || !lexicon.getProperty().getFullName().equals(property.getFullName())) { |
|
545 |
lexicon = new CQPLexicon(corpus); |
|
546 |
lexicon.setProperty(property); |
|
547 |
} |
|
548 |
System.out.println("Lexicon="+lexicon+" "+lexicon.hashCode()); |
|
549 |
return lexicon; |
|
550 |
} |
|
551 |
|
|
552 |
|
|
553 |
/** |
|
554 |
* Gets a Lexicon from the specified corpus. |
|
555 |
* If a Lexicon child exists in the Corpus from the specified property, returns it otherwise creates and computes a new Lexicon. |
|
556 |
* @param corpus |
|
557 |
* @param property |
|
558 |
* @return |
|
559 |
* @throws Exception |
|
560 |
*/ |
|
561 |
public static CQPLexicon getLexicon(CQPCorpus corpus, Property property, IProgressMonitor monitor) throws Exception { |
|
562 |
return getLexicon(corpus, property, monitor, true); |
|
563 |
} |
|
564 |
|
|
565 |
|
|
566 |
// /** |
|
567 |
// * Find or build a lexicon given a Corpus (MainCorpus or SubCorpus). |
|
568 |
// * |
|
569 |
// * @param corpus |
|
570 |
// * @param property |
|
571 |
// * @return a Lexicon. May return null if the lexicon forms or freqs are null. |
|
572 |
// * @throws Exception |
|
573 |
// */ |
|
574 |
// public static Lexicon getLexicon(Corpus corpus, Property property) throws Exception { |
|
575 |
// HashSet<Object> results = corpus.getStoredData(Lexicon.class); |
|
576 |
// for (Object result : results) { |
|
577 |
// Lexicon lex = (Lexicon)result; |
|
578 |
// if (lex.getProperty().equals(property)) { |
|
579 |
// return lex; |
|
580 |
// } |
|
581 |
// } |
|
582 |
// |
|
583 |
// Lexicon lex = new Lexicon(corpus); |
|
584 |
// lex.setParameters(property); |
|
585 |
// if (lex.compute(null) && lex.getForms() != null && lex.getFreq() != null) { |
|
586 |
// corpus.storeData(lex); |
|
587 |
// return lex; |
|
588 |
// } else { |
|
589 |
// return null; |
|
590 |
// } |
|
591 |
// } |
|
592 |
|
|
593 |
public CQPCorpus getParent() { |
|
594 |
return corpus; |
|
595 |
|
|
596 |
} |
|
597 |
} |
|
0 | 598 |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 1395) | ||
---|---|---|
89 | 89 |
public CQPCorpus(Partition partition) { |
90 | 90 |
super(partition); |
91 | 91 |
} |
92 |
|
|
93 |
/** |
|
94 |
* Empty the corpus cache |
|
95 |
*/ |
|
96 |
@Override |
|
97 |
public void clean() { |
|
98 |
if (cache != null) { |
|
99 |
cache.clear(); |
|
100 |
} |
|
101 |
} |
|
92 | 102 |
|
93 | 103 |
/** |
94 | 104 |
* |
... | ... | |
1173 | 1183 |
} |
1174 | 1184 |
return output; |
1175 | 1185 |
} |
1186 |
|
|
1187 |
|
|
1188 |
protected static HashMap<Class<?>, ArrayList<?>> cache = new HashMap<Class<?>, ArrayList<?>>(); |
|
1189 |
private static final int CACHE_MAXIMUM_SIZE = 5; |
|
1190 |
|
|
1191 |
/** |
|
1192 |
* retrieve all cached objects for an object |
|
1193 |
* @param forObject |
|
1194 |
* @param c |
|
1195 |
* @return |
|
1196 |
*/ |
|
1197 |
public static HashMap<Class<?>, ArrayList<?>> getObjectCache() { |
|
1198 |
return cache; |
|
1199 |
} |
|
1200 |
|
|
1201 |
/** |
|
1202 |
* retrieve all cached objects of a certain class for an object |
|
1203 |
* @param forObject |
|
1204 |
* @param c |
|
1205 |
* @return |
|
1206 |
*/ |
|
1207 |
public static ArrayList<?> getObjectCacheForClass(Class<?> c) { |
|
1208 |
HashMap<Class<?>, ArrayList<?>> oCache = getObjectCache(); |
|
1209 |
if (!oCache.containsKey(c)) { |
|
1210 |
oCache.put(c, new ArrayList<Object>()); |
|
1211 |
} |
|
1212 |
return oCache.get(c); |
|
1213 |
} |
|
1214 |
|
|
1215 |
/** |
|
1216 |
* store an object for a maximum of 5 object cached per object |
|
1217 |
* @param forObject |
|
1218 |
* @param toCache |
|
1219 |
* @return |
|
1220 |
*/ |
|
1221 |
public static Object cache(Object toCache) { |
|
1222 |
if (toCache == null) return null; |
|
1223 |
|
|
1224 |
ArrayList<?> ocCache = getObjectCacheForClass(toCache.getClass()); |
|
1225 |
if (ocCache.size() > CACHE_MAXIMUM_SIZE) { |
|
1226 |
ocCache.remove(0); // oldest value |
|
1227 |
} |
|
1228 |
return toCache; |
|
1229 |
} |
|
1230 |
|
|
1231 |
/** |
|
1232 |
* remove a cached object |
|
1233 |
* |
|
1234 |
* @param forObject |
|
1235 |
* @param toCache |
|
1236 |
* @return |
|
1237 |
*/ |
|
1238 |
public static boolean uncache(Object toUnCache) { |
|
1239 |
if (toUnCache == null) return false; |
|
1240 |
|
|
1241 |
ArrayList<?> ocCache = getObjectCacheForClass(toUnCache.getClass()); |
|
1242 |
return ocCache.remove(toUnCache); |
|
1243 |
} |
|
1244 |
|
|
1245 |
|
|
1246 |
/** |
|
1247 |
* remove a cached object |
|
1248 |
* |
|
1249 |
* @param forObject |
|
1250 |
* @param toCache |
|
1251 |
* @return |
|
1252 |
*/ |
|
1253 |
public static ArrayList<?> uncache(Class classToUnCache) { |
|
1254 |
if (classToUnCache == null) return null; |
|
1255 |
|
|
1256 |
return cache.remove(classToUnCache); |
|
1257 |
} |
|
1176 | 1258 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/MainCorpus.java (revision 1395) | ||
---|---|---|
573 | 573 |
|
574 | 574 |
@Override |
575 | 575 |
public void clean() { |
576 |
|
|
576 |
super.clean(); |
|
577 |
|
|
577 | 578 |
try { |
578 | 579 |
if (CQPSearchEngine.isInitialized()) { |
579 | 580 |
CQPSearchEngine.getCqiClient().dropCorpus(getID()); |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/Subcorpus.java (revision 1395) | ||
---|---|---|
502 | 502 |
|
503 | 503 |
@Override |
504 | 504 |
public void clean() { |
505 |
super.clean(); |
|
505 | 506 |
try { |
506 | 507 |
AbstractCqiClient CQI = CorpusManager.getCorpusManager().getCqiClient(); |
507 | 508 |
if (CQPSearchEngine.isInitialized()) { |
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/LexicalTable.java (revision 1395) | ||
---|---|---|
12 | 12 |
import org.txm.core.results.Parameter; |
13 | 13 |
import org.txm.core.results.TXMParameters; |
14 | 14 |
import org.txm.core.results.TXMResult; |
15 |
import org.txm.index.core.functions.Lexicon; |
|
16 | 15 |
import org.txm.index.core.functions.Line; |
17 | 16 |
import org.txm.index.core.functions.PartitionIndex; |
18 | 17 |
import org.txm.lexicaltable.core.messages.LexicalTableCoreMessages; |
... | ... | |
21 | 20 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
22 | 21 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
23 | 22 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
23 |
import org.txm.searchengine.cqp.corpus.CQPLexicon; |
|
24 | 24 |
import org.txm.searchengine.cqp.corpus.Partition; |
25 | 25 |
import org.txm.searchengine.cqp.corpus.Property; |
26 | 26 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
... | ... | |
206 | 206 |
protected void _computeFromPartition(Partition partition) throws Exception { |
207 | 207 |
|
208 | 208 |
// parts lexicons |
209 |
List<Lexicon> partsLexicons = new ArrayList<Lexicon>();
|
|
209 |
List<CQPLexicon> partsLexicons = new ArrayList<CQPLexicon>();
|
|
210 | 210 |
for (int i = 0; i < partition.getPartsCount(); i++) { |
211 |
partsLexicons.add(Lexicon.getLexicon(partition.getParts().get(i), this.property, this.monitor, false)); |
|
211 |
partsLexicons.add(CQPLexicon.getLexicon(partition.getParts().get(i), this.property, this.monitor, false));
|
|
212 | 212 |
} |
213 | 213 |
|
214 | 214 |
// Corpus global lexicon |
215 |
Lexicon corpusLexicon = Lexicon.getLexicon(partition.getParent(), this.property, this.monitor, false);
|
|
215 |
CQPLexicon corpusLexicon = CQPLexicon.getLexicon(partition.getParent(), this.property, this.monitor, false);
|
|
216 | 216 |
|
217 | 217 |
ArrayList<String> filteredForms = new ArrayList<String>(); |
218 | 218 |
//create a copy and filter line with Fmin; |
... | ... | |
231 | 231 |
|
232 | 232 |
Integer id = null; |
233 | 233 |
for (int i = 0; i < partsLexicons.size(); i++) { |
234 |
Lexicon l = partsLexicons.get(i); |
|
234 |
CQPLexicon l = partsLexicons.get(i);
|
|
235 | 235 |
String[] ents = l.getForms(); |
236 | 236 |
int[] freqs = l.getFreq(); |
237 | 237 |
for (int j = 0; j < freqs.length; j++) { |
... | ... | |
354 | 354 |
|
355 | 355 |
Subcorpus subcorpus = (Subcorpus) this.parent; |
356 | 356 |
CQPCorpus parentCorpus = subcorpus.getMainCorpus(); |
357 |
Lexicon l1 = Lexicon.getLexicon(parentCorpus, property, monitor);
|
|
358 |
Lexicon l2 = Lexicon.getLexicon(subcorpus, property, monitor);
|
|
357 |
CQPLexicon l1 = CQPLexicon.getLexicon(parentCorpus, property, monitor, false);
|
|
358 |
CQPLexicon l2 = CQPLexicon.getLexicon(subcorpus, property, monitor, false);
|
|
359 | 359 |
|
360 | 360 |
this.statsData = new LexicalTableImpl(getNextName(), l1, l2); |
361 | 361 |
} |
tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/statsengine/r/data/LexicalTableImpl.java (revision 1395) | ||
---|---|---|
37 | 37 |
import java.util.List; |
38 | 38 |
|
39 | 39 |
import org.rosuda.REngine.REXPMismatchException; |
40 |
import org.txm.index.core.functions.Lexicon; |
|
41 | 40 |
import org.txm.lexicaltable.core.messages.LexicalTableCoreMessages; |
42 | 41 |
import org.txm.lexicaltable.core.statsengine.data.ILexicalTable; |
42 |
import org.txm.searchengine.cqp.corpus.CQPLexicon; |
|
43 | 43 |
import org.txm.searchengine.cqp.corpus.Property; |
44 | 44 |
import org.txm.statsengine.core.StatException; |
45 | 45 |
import org.txm.statsengine.core.data.Vector; |
46 | 46 |
import org.txm.statsengine.r.core.RWorkspace; |
47 | 47 |
import org.txm.statsengine.r.core.data.ContingencyTableImpl; |
48 |
import org.txm.statsengine.r.core.data.VectorImpl; |
|
48 | 49 |
import org.txm.statsengine.r.core.exceptions.RException; |
49 | 50 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
50 | 51 |
|
... | ... | |
125 | 126 |
super(symbol); |
126 | 127 |
} |
127 | 128 |
|
129 |
/** The nolex. */ |
|
130 |
protected static int nolex = 1; |
|
131 |
|
|
132 |
/** The prefix r. */ |
|
133 |
protected static String prefixR = "Lexicon_"; //$NON-NLS-1$ |
|
128 | 134 |
/** |
135 |
* Convert the Lexicon into a Vector object. |
|
136 |
* TODO move this code somewhere |
|
137 |
* @return the vector |
|
138 |
* @throws StatException the stat exception |
|
139 |
*/ |
|
140 |
public static Vector asVector(CQPLexicon lex) throws StatException { |
|
141 |
String symbol = prefixR + (nolex++); |
|
142 |
VectorImpl v = new VectorImpl(lex.getFreq(), symbol); |
|
143 |
v.setRNames(lex.getForms()); |
|
144 |
lex.setSymbol(v.getSymbol()); |
|
145 |
return v; |
|
146 |
} |
|
147 |
|
|
148 |
|
|
149 |
/** |
|
129 | 150 |
* |
130 | 151 |
* @param symbol |
131 | 152 |
* @param corpusLexicon |
132 | 153 |
* @param subcorpusLexicon |
133 | 154 |
* @throws StatException |
134 | 155 |
*/ |
135 |
public LexicalTableImpl(String symbol, Lexicon corpusLexicon, Lexicon subcorpusLexicon) throws StatException {
|
|
156 |
public LexicalTableImpl(String symbol, CQPLexicon corpusLexicon, CQPLexicon subcorpusLexicon) throws StatException {
|
|
136 | 157 |
super(symbol); |
137 | 158 |
RWorkspace rw = RWorkspace.getRWorkspaceInstance(); |
138 |
Vector corpusLexiconV = corpusLexicon.asVector();
|
|
139 |
Vector subcorpusLexiconV = subcorpusLexicon.asVector();
|
|
159 |
Vector corpusLexiconV = asVector(corpusLexicon);
|
|
160 |
Vector subcorpusLexiconV = asVector(subcorpusLexicon);
|
|
140 | 161 |
|
141 | 162 |
//TODO: implement the R function : rw.callFunction("lexicons2LexicalTable", new QuantitativeDataStructure[] { corpusLexiconV, subcorpusLexiconV }, symbol); //$NON-NLS-1$ |
142 | 163 |
|
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/corpora/CorporaView.java (revision 1395) | ||
---|---|---|
172 | 172 |
|
173 | 173 |
Workspace w = Toolbox.workspace; |
174 | 174 |
if (w == null) return; |
175 |
treeViewer.setInput(w); treeViewer.refresh(); |
|
175 |
treeViewer.setInput(w); |
|
176 |
treeViewer.refresh(); |
|
176 | 177 |
} |
Formats disponibles : Unified diff