Révision 468
tmp/org.txm.cql2lsa.rcp/plugin.xml (revision 468) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<?eclipse version="3.4"?> |
|
3 |
<plugin> |
|
4 |
|
|
5 |
<extension |
|
6 |
point="org.eclipse.ui.commands"> |
|
7 |
<command |
|
8 |
categoryId="org.txm.rcpapplication.category.txm" |
|
9 |
defaultHandler="org.txm.rcpapplication.commands.function.ComputeExpI" |
|
10 |
id="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts" |
|
11 |
name="QueryIndexOfTexts"> |
|
12 |
</command> |
|
13 |
<command |
|
14 |
categoryId="org.txm.rcpapplication.category.txm" |
|
15 |
defaultHandler="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery" |
|
16 |
id="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery" |
|
17 |
name="ValuesOfQuery"> |
|
18 |
</command> |
|
19 |
<command |
|
20 |
categoryId="org.txm.rcpapplication.category.txm" |
|
21 |
defaultHandler="org.txm.rcpapplication.commands.function.ComputeExpII" |
|
22 |
id="org.txm.rcpapplication.commands.function.ComputeExpIIA" |
|
23 |
name="ExpIIA"> |
|
24 |
</command> |
|
25 |
<command |
|
26 |
categoryId="org.txm.rcpapplication.category.txm" |
|
27 |
defaultHandler="org.txm.rcpapplication.commands.function.CreateContextSubcorpus" |
|
28 |
id="org.txm.rcpapplication.commands.function.CreateQueriesContextSubcorpus" |
|
29 |
name="ContextSubcorpus"> |
|
30 |
</command> |
|
31 |
</extension> |
|
32 |
|
|
33 |
<extension |
|
34 |
point="org.eclipse.ui.menus"> |
|
35 |
<menuContribution |
|
36 |
allPopups="false" |
|
37 |
locationURI="toolbar:org.txm.rcpapplication.toolbartools?before=org.txm.rcpapplication.separator1"> |
|
38 |
<command |
|
39 |
commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts" |
|
40 |
label="Exp I" |
|
41 |
style="push"> |
|
42 |
<visibleWhen |
|
43 |
checkEnabled="false"> |
|
44 |
<or> |
|
45 |
<reference |
|
46 |
definitionId="OneCorpusSelected"> |
|
47 |
</reference> |
|
48 |
</or> |
|
49 |
</visibleWhen> |
|
50 |
</command> |
|
51 |
<command |
|
52 |
commandId="org.txm.rcpapplication.commands.function.ComputeValuesOfQuery" |
|
53 |
label="ValuesOfQuery" |
|
54 |
style="push"> |
|
55 |
<visibleWhen |
|
56 |
checkEnabled="false"> |
|
57 |
<or> |
|
58 |
<reference |
|
59 |
definitionId="OneCorpusSelected"> |
|
60 |
</reference> |
|
61 |
</or> |
|
62 |
</visibleWhen> |
|
63 |
</command> |
|
64 |
<command |
|
65 |
commandId="org.txm.rcpapplication.commands.function.CreateQueriesContextSubcorpus" |
|
66 |
label="ContextSubcorpus" |
|
67 |
style="push"> |
|
68 |
<visibleWhen |
|
69 |
checkEnabled="false"> |
|
70 |
<or> |
|
71 |
<reference |
|
72 |
definitionId="OneCorpusSelected"> |
|
73 |
</reference> |
|
74 |
</or> |
|
75 |
</visibleWhen> |
|
76 |
</command> |
|
77 |
<command |
|
78 |
commandId="org.txm.rcpapplication.commands.function.ComputeExpIIA" |
|
79 |
label="Exp II" |
|
80 |
style="push"> |
|
81 |
<visibleWhen |
|
82 |
checkEnabled="false"> |
|
83 |
<or> |
|
84 |
<reference |
|
85 |
definitionId="OneCorpusSelected"> |
|
86 |
</reference> |
|
87 |
</or> |
|
88 |
</visibleWhen> |
|
89 |
</command> |
|
90 |
</menuContribution> |
|
91 |
<menuContribution |
|
92 |
allPopups="false" |
|
93 |
locationURI="popup:org.txm.rcpapplication.views.CorporaView"> |
|
94 |
<command |
|
95 |
commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts" |
|
96 |
icon="icons/functions/QueryIndexOfTexts.png" |
|
97 |
style="push"> |
|
98 |
<visibleWhen |
|
99 |
checkEnabled="false"> |
|
100 |
<or> |
|
101 |
<reference |
|
102 |
definitionId="OneCorpusSelected"> |
|
103 |
</reference> |
|
104 |
</or> |
|
105 |
</visibleWhen> |
|
106 |
</command> |
|
107 |
</menuContribution> |
|
108 |
<menuContribution |
|
109 |
locationURI="menu:menu.tools"> |
|
110 |
<command |
|
111 |
commandId="org.txm.rcpapplication.commands.function.ComputeQueryIndexOfTexts" |
|
112 |
icon="icons/functions/QueryIndexOfTexts.png" |
|
113 |
style="push"> |
|
114 |
<visibleWhen |
|
115 |
checkEnabled="false"> |
|
116 |
<or> |
|
117 |
<reference |
|
118 |
definitionId="OneCorpusSelected"> |
|
119 |
</reference> |
|
120 |
</or> |
|
121 |
</visibleWhen> |
|
122 |
</command> |
|
123 |
</menuContribution> |
|
124 |
<menuContribution |
|
125 |
locationURI="menu:menu.help.plugins"> |
|
126 |
<command |
|
127 |
commandId="org.txm.rcpapplication.commands.OpenBrowser" |
|
128 |
label="CQL2LSA" |
|
129 |
style="push"> |
|
130 |
<parameter |
|
131 |
name="org.txm.rcpapplication.commands.commandParameter2" |
|
132 |
value="https://groupes.renater.fr/wiki/txm-users/public/extensions#CQL2LSA"> |
|
133 |
</parameter> |
|
134 |
</command> |
|
135 |
</menuContribution> |
|
136 |
</extension> |
|
137 |
</plugin> |
|
0 | 138 |
tmp/org.txm.cql2lsa.rcp/.classpath (revision 468) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<classpath> |
|
3 |
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/> |
|
4 |
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"> |
|
5 |
<accessrules> |
|
6 |
<accessrule kind="accessible" pattern="**"/> |
|
7 |
</accessrules> |
|
8 |
</classpathentry> |
|
9 |
<classpathentry kind="src" path="src"/> |
|
10 |
<classpathentry kind="output" path="bin"/> |
|
11 |
</classpath> |
|
0 | 12 |
tmp/org.txm.cql2lsa.rcp/META-INF/MANIFEST.MF (revision 468) | ||
---|---|---|
1 |
Manifest-Version: 1.0 |
|
2 |
Bundle-ManifestVersion: 2 |
|
3 |
Bundle-Name: CQL2LSA |
|
4 |
Bundle-SymbolicName: CQL2LSA;singleton:=true |
|
5 |
Bundle-Version: 1.0.0.qualifier |
|
6 |
Bundle-Activator: cql2lsarcp.Activator |
|
7 |
Require-Bundle: org.txm.core;bundle-version="0.7.0", |
|
8 |
org.txm.rcp;bundle-version="0.7.8", |
|
9 |
org.eclipse.ui, |
|
10 |
org.eclipse.core.runtime, |
|
11 |
org.eclipse.ui.editors;bundle-version="3.8.100", |
|
12 |
org.eclipse.core.expressions;bundle-version="3.4.500", |
|
13 |
org.txm.index.core, |
|
14 |
org.txm.lexicaltable.core, |
|
15 |
org.txm.statsengine.r.core |
|
16 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.6 |
|
17 |
Bundle-ActivationPolicy: lazy |
|
18 |
Export-Package: cql2lsarcp, |
|
19 |
org.txm.functions.cql2lsa, |
|
20 |
org.txm.rcpapplication.commands.function |
|
21 |
Bundle-Vendor: Textometrie.org |
|
0 | 22 |
tmp/org.txm.cql2lsa.rcp/.project (revision 468) | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<projectDescription> |
|
3 |
<name>CQL2LSARCP</name> |
|
4 |
<comment></comment> |
|
5 |
<projects> |
|
6 |
</projects> |
|
7 |
<buildSpec> |
|
8 |
<buildCommand> |
|
9 |
<name>org.eclipse.jdt.core.javabuilder</name> |
|
10 |
<arguments> |
|
11 |
</arguments> |
|
12 |
</buildCommand> |
|
13 |
<buildCommand> |
|
14 |
<name>org.eclipse.pde.ManifestBuilder</name> |
|
15 |
<arguments> |
|
16 |
</arguments> |
|
17 |
</buildCommand> |
|
18 |
<buildCommand> |
|
19 |
<name>org.eclipse.pde.SchemaBuilder</name> |
|
20 |
<arguments> |
|
21 |
</arguments> |
|
22 |
</buildCommand> |
|
23 |
</buildSpec> |
|
24 |
<natures> |
|
25 |
<nature>org.eclipse.pde.PluginNature</nature> |
|
26 |
<nature>org.eclipse.jdt.core.javanature</nature> |
|
27 |
</natures> |
|
28 |
</projectDescription> |
|
0 | 29 |
tmp/org.txm.cql2lsa.rcp/src/cql2lsarcp/Activator.java (revision 468) | ||
---|---|---|
1 |
package cql2lsarcp; |
|
2 |
|
|
3 |
import org.eclipse.jface.resource.ImageDescriptor; |
|
4 |
import org.eclipse.ui.plugin.AbstractUIPlugin; |
|
5 |
import org.osgi.framework.BundleContext; |
|
6 |
|
|
7 |
/** |
|
8 |
* The activator class controls the plug-in life cycle |
|
9 |
*/ |
|
10 |
public class Activator extends AbstractUIPlugin { |
|
11 |
|
|
12 |
// The plug-in ID |
|
13 |
public static final String PLUGIN_ID = "CQL2LSA"; //$NON-NLS-1$ |
|
14 |
|
|
15 |
// The shared instance |
|
16 |
private static Activator plugin; |
|
17 |
|
|
18 |
/** |
|
19 |
* The constructor |
|
20 |
*/ |
|
21 |
public Activator() { |
|
22 |
} |
|
23 |
|
|
24 |
/* |
|
25 |
* (non-Javadoc) |
|
26 |
* @see org.eclipse.ui.plugin.AbstractUIPlugin#start(org.osgi.framework.BundleContext) |
|
27 |
*/ |
|
28 |
public void start(BundleContext context) throws Exception { |
|
29 |
super.start(context); |
|
30 |
plugin = this; |
|
31 |
} |
|
32 |
|
|
33 |
/* |
|
34 |
* (non-Javadoc) |
|
35 |
* @see org.eclipse.ui.plugin.AbstractUIPlugin#stop(org.osgi.framework.BundleContext) |
|
36 |
*/ |
|
37 |
public void stop(BundleContext context) throws Exception { |
|
38 |
plugin = null; |
|
39 |
super.stop(context); |
|
40 |
} |
|
41 |
|
|
42 |
/** |
|
43 |
* Returns the shared instance |
|
44 |
* |
|
45 |
* @return the shared instance |
|
46 |
*/ |
|
47 |
public static Activator getDefault() { |
|
48 |
return plugin; |
|
49 |
} |
|
50 |
|
|
51 |
/** |
|
52 |
* Returns an image descriptor for the image file at the given |
|
53 |
* plug-in relative path |
|
54 |
* |
|
55 |
* @param path the path |
|
56 |
* @return the image descriptor |
|
57 |
*/ |
|
58 |
public static ImageDescriptor getImageDescriptor(String path) { |
|
59 |
return imageDescriptorFromPlugin(PLUGIN_ID, path); |
|
60 |
} |
|
61 |
} |
|
0 | 62 |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ContextSubcorpus.java (revision 468) | ||
---|---|---|
1 |
package org.txm.functions.cql2lsa; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.File; |
|
5 |
import java.io.FileInputStream; |
|
6 |
import java.io.IOException; |
|
7 |
import java.io.InputStreamReader; |
|
8 |
import java.util.ArrayList; |
|
9 |
import java.util.HashMap; |
|
10 |
|
|
11 |
import org.txm.core.results.TXMResult; |
|
12 |
import org.txm.functions.ProgressWatcher; |
|
13 |
import org.txm.functions.TXMCommand; |
|
14 |
import org.txm.searchengine.cqp.CQPEngine; |
|
15 |
import org.txm.searchengine.cqp.MemCqiClient; |
|
16 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
17 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
18 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
19 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
20 |
import org.txm.searchengine.cqp.corpus.Subcorpus; |
|
21 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
22 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
23 |
|
|
24 |
public class ContextSubcorpus extends TXMCommand { |
|
25 |
|
|
26 |
protected ArrayList<QueryResult> results = new ArrayList<QueryResult>(); |
|
27 |
protected HashMap<String, String> keywordQueriestoName = new HashMap<String, String>(); |
|
28 |
|
|
29 |
protected Corpus corpus; |
|
30 |
protected File keywordsFile; |
|
31 |
protected int contextSize; |
|
32 |
|
|
33 |
public ContextSubcorpus(Corpus corpus, File keywordsFile, int contextSize) { |
|
34 |
this.corpus = corpus; |
|
35 |
this.keywordsFile = keywordsFile; |
|
36 |
this.contextSize = contextSize; |
|
37 |
} |
|
38 |
|
|
39 |
@Override |
|
40 |
public boolean toTxt(File outfile, String encoding, String colseparator, |
|
41 |
String txtseparator) throws Exception { |
|
42 |
return false; |
|
43 |
} |
|
44 |
|
|
45 |
@Override |
|
46 |
public void clean() { |
|
47 |
|
|
48 |
} |
|
49 |
|
|
50 |
@Override |
|
51 |
public boolean delete() { |
|
52 |
return false; |
|
53 |
} |
|
54 |
|
|
55 |
@Override |
|
56 |
public TXMResult getParent() { |
|
57 |
return corpus; |
|
58 |
} |
|
59 |
|
|
60 |
public Subcorpus getSubcorpus() throws InvalidCqpIdException { |
|
61 |
if (results.size() > 0) { |
|
62 |
String name = keywordsFile.getName(); |
|
63 |
int idx = name.indexOf("."); |
|
64 |
if (idx > 0) name = name.substring(0, idx); |
|
65 |
|
|
66 |
Subcorpus sub = corpus.createSubcorpus(name+"_contexts", results.get(0)); |
|
67 |
corpus.addResult(sub); |
|
68 |
return sub; |
|
69 |
} |
|
70 |
return null; |
|
71 |
} |
|
72 |
|
|
73 |
@Override |
|
74 |
public boolean compute(ProgressWatcher monitor) throws IOException, CqiClientException, CqiServerError, InvalidCqpIdException { |
|
75 |
if (!(CQPEngine.getCqiClient() instanceof MemCqiClient)); |
|
76 |
|
|
77 |
MemCqiClient CQI = (MemCqiClient) CQPEngine.getCqiClient(); |
|
78 |
|
|
79 |
BufferedReader keywordFileReader = new BufferedReader(new InputStreamReader( |
|
80 |
new FileInputStream(keywordsFile), "UTF-8")); //$NON-NLS-1$ |
|
81 |
ArrayList<String> keywordLines = new ArrayList<String>(); |
|
82 |
String l = keywordFileReader.readLine(); |
|
83 |
while (l != null) { |
|
84 |
keywordLines.add(l); |
|
85 |
l = keywordFileReader.readLine(); |
|
86 |
} |
|
87 |
keywordFileReader.close(); |
|
88 |
System.out.println("Number of keywords lines: "+keywordLines.size()); |
|
89 |
System.out.println("context left and right size is: "+contextSize); |
|
90 |
monitor.beginTask("Querying keywords...", keywordLines.size()); |
|
91 |
|
|
92 |
int nkeyword = 0; |
|
93 |
for (String line : keywordLines) { |
|
94 |
String[] split = line.split("=", 2); //$NON-NLS-1$ |
|
95 |
if (split.length == 2) { |
|
96 |
Query q = new Query(split[1]+" expand to "+contextSize); |
|
97 |
keywordQueriestoName.put(split[1], split[0]); |
|
98 |
results.add(corpus.query(q, "K"+nkeyword++, false)); |
|
99 |
|
|
100 |
} |
|
101 |
} |
|
102 |
keywordFileReader.close(); |
|
103 |
|
|
104 |
// Loop over QueryResult to Merge them into one subcorpus |
|
105 |
int n = 0; |
|
106 |
monitor.beginTask("Creating subcorpus...", results.size()); |
|
107 |
while(results.size() > 1) { |
|
108 |
|
|
109 |
QueryResult q1 = results.get(0); |
|
110 |
QueryResult q2 = results.get(1); |
|
111 |
//System.out.println("Mergin... "+q1+" "+q2); |
|
112 |
//System.out.println("match sizes "+q1.getNMatch()+" "+q2.getNMatch()); |
|
113 |
|
|
114 |
String merge_name = "Merge"+(n++); |
|
115 |
CQI.query(merge_name+"= union "+q1.getQualifiedCqpId()+" "+q2.getQualifiedCqpId()+";"); |
|
116 |
results.remove(0); |
|
117 |
results.remove(0); |
|
118 |
results.add(new QueryResult(merge_name, merge_name, corpus, null)); |
|
119 |
monitor.worked(1); |
|
120 |
} |
|
121 |
|
|
122 |
System.out.println("Done."); |
|
123 |
return true; |
|
124 |
} |
|
125 |
|
|
126 |
@Override |
|
127 |
public String getName() { |
|
128 |
// TODO Auto-generated method stub |
|
129 |
return null; |
|
130 |
} |
|
131 |
|
|
132 |
@Override |
|
133 |
public String getSimpleName() { |
|
134 |
// TODO Auto-generated method stub |
|
135 |
return null; |
|
136 |
} |
|
137 |
|
|
138 |
@Override |
|
139 |
public String getDetails() { |
|
140 |
// TODO Auto-generated method stub |
|
141 |
return null; |
|
142 |
} |
|
143 |
} |
|
0 | 144 |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ValuesOfQuery.java (revision 468) | ||
---|---|---|
1 |
package org.txm.functions.cql2lsa; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.BufferedWriter; |
|
5 |
import java.io.File; |
|
6 |
import java.io.FileInputStream; |
|
7 |
import java.io.FileOutputStream; |
|
8 |
import java.io.IOException; |
|
9 |
import java.io.InputStreamReader; |
|
10 |
import java.io.OutputStreamWriter; |
|
11 |
import java.util.Arrays; |
|
12 |
|
|
13 |
import org.txm.core.results.TXMResult; |
|
14 |
import org.txm.functions.ProgressWatcher; |
|
15 |
import org.txm.functions.TXMCommand; |
|
16 |
import org.txm.searchengine.cqp.CQPEngine; |
|
17 |
import org.txm.searchengine.cqp.MemCqiClient; |
|
18 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
19 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
20 |
import org.txm.searchengine.cqp.corpus.Property; |
|
21 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
22 |
import org.txm.searchengine.cqp.corpus.StructuralUnitProperty; |
|
23 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
24 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
25 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
26 |
|
|
27 |
/** |
|
28 |
* Simple index with no multiple properties |
|
29 |
* |
|
30 |
* @author mdecorde |
|
31 |
* |
|
32 |
*/ |
|
33 |
public class ValuesOfQuery extends TXMCommand { |
|
34 |
|
|
35 |
Corpus corpus; |
|
36 |
Query query; |
|
37 |
Property prop; |
|
38 |
int fmin; |
|
39 |
|
|
40 |
public ValuesOfQuery(Corpus corpus, Query query, Property prop, int fmin) { |
|
41 |
this.corpus = corpus; |
|
42 |
this.query = query; |
|
43 |
this.prop = prop; |
|
44 |
this.fmin = fmin; |
|
45 |
} |
|
46 |
|
|
47 |
public boolean compute(File outputFile) throws CqiClientException, IOException, CqiServerError { |
|
48 |
|
|
49 |
if (!(CQPEngine.getCqiClient() instanceof MemCqiClient)) return false; |
|
50 |
|
|
51 |
monitor.beginTask("Start querying...", 100); |
|
52 |
|
|
53 |
File file = File.createTempFile("query", ".txt"); |
|
54 |
//ExecTimer t = new ExecTimer(); |
|
55 |
//System.out.println("run query "+query);t.start(); |
|
56 |
QueryResult result = corpus.query(query, "ValuesOf", false); |
|
57 |
//System.out.println("query done"+t.stop());t.start(); |
|
58 |
|
|
59 |
//System.out.println("group query "+query+" and save in "+file.getAbsolutePath());t.start(); |
|
60 |
MemCqiClient cli = (MemCqiClient)CQPEngine.getCqiClient(); |
|
61 |
cli.query("group "+result.getQualifiedCqpId()+" match "+prop.getName()+" > \""+file+"\";"); |
|
62 |
//System.out.println("query done"+t.stop());t.start(); |
|
63 |
|
|
64 |
monitor.worked(50); |
|
65 |
|
|
66 |
if (!file.exists()) return false; |
|
67 |
|
|
68 |
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); |
|
69 |
String line = reader.readLine(); // "#---------------------------------" |
|
70 |
line = reader.readLine(); // "(none) word \t freq" |
|
71 |
|
|
72 |
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")); |
|
73 |
|
|
74 |
while (line != null) { |
|
75 |
line = line.substring(30); |
|
76 |
// int len = line.length(); |
|
77 |
// |
|
78 |
int i = line.indexOf('\t'); |
|
79 |
// while (line.charAt(i) == ' ') i++; |
|
80 |
// int i_start = i; |
|
81 |
// while (line.charAt(i) != ' ') i++; |
|
82 |
// String w = line.substring(i_start, i); |
|
83 |
// |
|
84 |
// while (line.charAt(i) == ' ') i++; |
|
85 |
// i_start = i; |
|
86 |
// while (line.charAt(i) != ' ' && i < len) i++; |
|
87 |
// String f = line.substring(i_start, i); |
|
88 |
// |
|
89 |
// System.out.println("'"+w+"'\t'"+f+"'"); |
|
90 |
int f = Integer.parseInt(line.substring(i).trim()); |
|
91 |
String s; |
|
92 |
if (f > fmin) { |
|
93 |
//System.out.println(line.substring(0, i).trim() + "\t"+f); |
|
94 |
s = line.substring(0, i).trim(); |
|
95 |
writer.write(s+"=["+prop.getName()+"=\""+s+"\"]\n"); |
|
96 |
} else { |
|
97 |
break; |
|
98 |
} |
|
99 |
line = reader.readLine(); |
|
100 |
monitor.worked(1); |
|
101 |
} |
|
102 |
reader.close(); |
|
103 |
writer.close(); |
|
104 |
System.out.println("Done printing queries in "+outputFile.getAbsolutePath()); |
|
105 |
return true; |
|
106 |
|
|
107 |
} |
|
108 |
|
|
109 |
/** |
|
110 |
* Test purpose function. |
|
111 |
* Does not manage big corpus |
|
112 |
* |
|
113 |
* @throws CqiClientException |
|
114 |
* @throws IOException |
|
115 |
* @throws CqiServerError |
|
116 |
*/ |
|
117 |
public void test() throws CqiClientException, IOException, CqiServerError { |
|
118 |
QueryResult result = corpus.query(query, "ValuesOf", false); |
|
119 |
int[] positions = new int[result.getNMatch()]; |
|
120 |
int i = 0; |
|
121 |
for (Match m : result.getMatches()) { |
|
122 |
positions[i++] = m.getStart(); |
|
123 |
} |
|
124 |
|
|
125 |
String[] values = null; |
|
126 |
if (prop instanceof StructuralUnitProperty) { |
|
127 |
int[] structs = CQPEngine.getCqiClient().cpos2Struc(prop.getQualifiedName(), positions); |
|
128 |
positions = null; |
|
129 |
structs = uniquify(structs); |
|
130 |
values = CQPEngine.getCqiClient().struc2Str(prop.getQualifiedName(), structs); |
|
131 |
structs = null; |
|
132 |
} else { |
|
133 |
int[] indexes = CQPEngine.getCqiClient().cpos2Id(prop.getQualifiedName(), positions); |
|
134 |
positions = null; |
|
135 |
indexes = uniquify(indexes); |
|
136 |
values = CQPEngine.getCqiClient().id2Str(prop.getQualifiedName(), indexes); |
|
137 |
indexes = null; |
|
138 |
} |
|
139 |
|
|
140 |
System.out.println("Values: "); |
|
141 |
for (String v : values) { |
|
142 |
System.out.println(v); |
|
143 |
} |
|
144 |
} |
|
145 |
|
|
146 |
/** |
|
147 |
* |
|
148 |
* @param idx |
|
149 |
* @return the uniq valuesof the idx array |
|
150 |
*/ |
|
151 |
public static int[] uniquify(int[] idx) { |
|
152 |
int[] result = new int[idx.length]; |
|
153 |
int n = 0; |
|
154 |
Arrays.sort(idx); |
|
155 |
|
|
156 |
int previous = -1; |
|
157 |
for (int i : idx) { |
|
158 |
if (previous != i) { |
|
159 |
result[n++] = i; |
|
160 |
previous = i; |
|
161 |
} |
|
162 |
} |
|
163 |
|
|
164 |
int[] final_result = new int[n]; |
|
165 |
System.arraycopy(result, 0, final_result, 0, n); |
|
166 |
|
|
167 |
return final_result; |
|
168 |
} |
|
169 |
|
|
170 |
@Override |
|
171 |
public boolean toTxt(File outfile, String encoding, String colseparator, |
|
172 |
String txtseparator) throws Exception { |
|
173 |
// TODO Auto-generated method stub |
|
174 |
return false; |
|
175 |
} |
|
176 |
|
|
177 |
@Override |
|
178 |
public void clean() { |
|
179 |
// TODO Auto-generated method stub |
|
180 |
|
|
181 |
} |
|
182 |
|
|
183 |
@Override |
|
184 |
public boolean delete() { |
|
185 |
// TODO Auto-generated method stub |
|
186 |
return false; |
|
187 |
} |
|
188 |
|
|
189 |
@Override |
|
190 |
public TXMResult getParent() { |
|
191 |
return corpus; |
|
192 |
} |
|
193 |
|
|
194 |
@Override |
|
195 |
public boolean compute(ProgressWatcher watcher) throws Exception { |
|
196 |
// TODO Auto-generated method stub |
|
197 |
return false; |
|
198 |
} |
|
199 |
|
|
200 |
@Override |
|
201 |
public String getName() { |
|
202 |
return "ValuesOfQuery"; |
|
203 |
} |
|
204 |
|
|
205 |
@Override |
|
206 |
public String getSimpleName() { |
|
207 |
return "ValuesOfQuery"; |
|
208 |
} |
|
209 |
|
|
210 |
@Override |
|
211 |
public String getDetails() { |
|
212 |
return this.corpus.getName()+" "+this.query+" "+this.prop+" "+this.fmin; |
|
213 |
} |
|
214 |
} |
|
0 | 215 |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpII.java (revision 468) | ||
---|---|---|
1 |
package org.txm.functions.cql2lsa; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.BufferedWriter; |
|
5 |
import java.io.File; |
|
6 |
import java.io.FileInputStream; |
|
7 |
import java.io.FileOutputStream; |
|
8 |
import java.io.IOException; |
|
9 |
import java.io.InputStreamReader; |
|
10 |
import java.io.OutputStreamWriter; |
|
11 |
import java.io.PrintWriter; |
|
12 |
import java.util.ArrayList; |
|
13 |
import java.util.Collection; |
|
14 |
import java.util.Collections; |
|
15 |
import java.util.Comparator; |
|
16 |
import java.util.HashMap; |
|
17 |
import java.util.LinkedHashMap; |
|
18 |
import java.util.List; |
|
19 |
import java.util.Map; |
|
20 |
|
|
21 |
import org.eclipse.core.runtime.IAdaptable; |
|
22 |
import org.eclipse.jface.resource.ImageDescriptor; |
|
23 |
import org.eclipse.ui.model.IWorkbenchAdapter; |
|
24 |
import org.txm.core.messages.TXMCoreMessages; |
|
25 |
import org.txm.core.results.TXMResult; |
|
26 |
import org.txm.functions.ProgressWatcher; |
|
27 |
import org.txm.functions.TXMCommand; |
|
28 |
import org.txm.index.core.functions.LineComparator.SortMode; |
|
29 |
import org.txm.index.core.messages.IndexCoreMessages; |
|
30 |
import org.txm.lexicaltable.core.functions.LexicalTable; |
|
31 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
|
32 |
import org.txm.rcp.Messages; |
|
33 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
34 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
35 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
36 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
37 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
38 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
39 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
40 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
41 |
import org.txm.utils.logger.Log; |
|
42 |
|
|
43 |
public class ExpII extends TXMCommand implements IAdaptable { |
|
44 |
Corpus corpus; |
|
45 |
List<String> texts; |
|
46 |
int[] textBoundaries; |
|
47 |
|
|
48 |
LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>(); |
|
49 |
|
|
50 |
/** The writer. */ |
|
51 |
private OutputStreamWriter writer; |
|
52 |
|
|
53 |
public ExpII(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException { |
|
54 |
this.corpus = corpus; |
|
55 |
texts = corpus.getTextsID(); |
|
56 |
textBoundaries = corpus.getTextEndLimits(); |
|
57 |
|
|
58 |
} |
|
59 |
|
|
60 |
public int getT() { |
|
61 |
int t = 0; |
|
62 |
for (QueryIndexLine line : lines.values()) { |
|
63 |
t += line.getFrequency(); |
|
64 |
} |
|
65 |
return t; |
|
66 |
} |
|
67 |
|
|
68 |
public int getFmin() { |
|
69 |
int t = 999999999; |
|
70 |
for (QueryIndexLine line : lines.values()) { |
|
71 |
int f = line.getFrequency(); |
|
72 |
if (f < t) t = f; |
|
73 |
} |
|
74 |
return t; |
|
75 |
} |
|
76 |
|
|
77 |
public int getFmax() { |
|
78 |
int t = 0; |
|
79 |
for (QueryIndexLine line : lines.values()) { |
|
80 |
int f = line.getFrequency(); |
|
81 |
if (f > t) t = f; |
|
82 |
} |
|
83 |
return t; |
|
84 |
} |
|
85 |
|
|
86 |
public int getV() { |
|
87 |
return lines.values().size(); |
|
88 |
} |
|
89 |
|
|
90 |
public String getName() { |
|
91 |
return corpus.getName(); |
|
92 |
} |
|
93 |
|
|
94 |
public List<String> getTextNames() { |
|
95 |
return texts; |
|
96 |
} |
|
97 |
|
|
98 |
public TXMResult getParent() { |
|
99 |
return corpus; |
|
100 |
} |
|
101 |
|
|
102 |
public Corpus getCorpus() { |
|
103 |
return corpus; |
|
104 |
} |
|
105 |
|
|
106 |
public Collection<QueryIndexLine> getLines() { |
|
107 |
return lines.values(); |
|
108 |
} |
|
109 |
|
|
110 |
public LinkedHashMap<String, QueryIndexLine> getLinesHash() { |
|
111 |
return lines; |
|
112 |
} |
|
113 |
|
|
114 |
int multi = 1; |
|
115 |
public void sortLines(SortMode mode, boolean revert) { |
|
116 |
|
|
117 |
multi = 1; |
|
118 |
if (revert) multi = -1; |
|
119 |
List<Map.Entry<String, QueryIndexLine>> entries = |
|
120 |
new ArrayList<Map.Entry<String, QueryIndexLine>>(lines.entrySet()); |
|
121 |
|
|
122 |
if (mode == SortMode.FREQUNIT) { |
|
123 |
Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() { |
|
124 |
public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){ |
|
125 |
int ret = multi * (a.getValue().getFrequency() - b.getValue().getFrequency()); |
|
126 |
if (ret == 0) { |
|
127 |
return multi * a.getValue().getName().compareTo(b.getValue().getName()); |
|
128 |
} |
|
129 |
return ret; |
|
130 |
} |
|
131 |
}); |
|
132 |
} else if (mode == SortMode.FREQ) { |
|
133 |
Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() { |
|
134 |
public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){ |
|
135 |
return multi * (a.getValue().getFrequency() - b.getValue().getFrequency()); |
|
136 |
} |
|
137 |
}); |
|
138 |
} else if (mode == SortMode.UNIT) { |
|
139 |
Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() { |
|
140 |
public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){ |
|
141 |
return multi * a.getValue().getName().compareTo(b.getValue().getName()); |
|
142 |
} |
|
143 |
}); |
|
144 |
} else if (mode == SortMode.UNITFREQ) { |
|
145 |
Collections.sort(entries, new Comparator<Map.Entry<String, QueryIndexLine>>() { |
|
146 |
public int compare(Map.Entry<String, QueryIndexLine> a, Map.Entry<String, QueryIndexLine> b){ |
|
147 |
int ret = multi * a.getValue().getName().compareTo(b.getValue().getName()); |
|
148 |
if (ret == 0) { |
|
149 |
return multi * (a.getValue().getFrequency() - b.getValue().getFrequency()); |
|
150 |
} |
|
151 |
return ret; |
|
152 |
} |
|
153 |
}); |
|
154 |
} |
|
155 |
|
|
156 |
LinkedHashMap<String, QueryIndexLine> sortedMap = new LinkedHashMap<String, QueryIndexLine>(); |
|
157 |
for (Map.Entry<String, QueryIndexLine> entry : entries) { |
|
158 |
sortedMap.put(entry.getKey(), entry.getValue()); |
|
159 |
} |
|
160 |
|
|
161 |
lines = sortedMap; |
|
162 |
} |
|
163 |
|
|
164 |
HashMap<String, int[]> keywordStartPositions; |
|
165 |
HashMap<String, int[]> keywordEndPositions; |
|
166 |
|
|
167 |
public void compute(File queriesFile, File keywordFile, File outputDir) throws CqiClientException, IOException, CqiServerError { |
|
168 |
System.out.println("Starting ExpII"); |
|
169 |
|
|
170 |
File outputFile = new File(outputDir, "doc_word_freq_keyword_2.txt"); |
|
171 |
File lexiconFile = new File(outputDir, "lexicon_2.txt"); |
|
172 |
|
|
173 |
PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"))); |
|
174 |
|
|
175 |
keywordStartPositions = new HashMap<String, int[]>(); |
|
176 |
keywordEndPositions = new HashMap<String, int[]>(); |
|
177 |
HashMap<String, String> keywordQueriestoName = new HashMap<String, String>(); |
|
178 |
int idx_lexicon_counter = 0; |
|
179 |
|
|
180 |
// assos query et idx |
|
181 |
// si queries est dans keywords alors, pas la peine de calculer keyword, parce que la query est un keyword |
|
182 |
LinkedHashMap<String, Integer> idxLexicon = new LinkedHashMap<String, Integer>(); |
|
183 |
LinkedHashMap<String, int[]> keywordsMaxCountsLexicon = new LinkedHashMap<String, int[]>(); |
|
184 |
|
|
185 |
BufferedReader keywordFileReader = new BufferedReader(new InputStreamReader( |
|
186 |
new FileInputStream(keywordFile), "UTF-8")); //$NON-NLS-1$ |
|
187 |
ArrayList<String> keywordLines = new ArrayList<String>(); |
|
188 |
String l = keywordFileReader.readLine(); |
|
189 |
while (l != null) { |
|
190 |
keywordLines.add(l); |
|
191 |
l = keywordFileReader.readLine(); |
|
192 |
} |
|
193 |
keywordFileReader.close(); |
|
194 |
System.out.println("Number of keywords lines: "+keywordLines.size()); |
|
195 |
monitor.beginTask("Querying keywords...", keywordLines.size()); |
|
196 |
|
|
197 |
int nkeyword = 0; |
|
198 |
for (String line : keywordLines) { |
|
199 |
String[] split = line.split("=", 2); //$NON-NLS-1$ |
|
200 |
if (split.length == 2) { |
|
201 |
Query q = new Query(split[1]); |
|
202 |
keywordQueriestoName.put(split[1], split[0]); |
|
203 |
QueryResult result = corpus.query(q, "K"+nkeyword++, false); //$NON-NLS-1$ |
|
204 |
keywordStartPositions.put(split[1], result.getStarts()); |
|
205 |
keywordEndPositions.put(split[1], result.getEnds()); |
|
206 |
keywordsMaxCountsLexicon.put(split[1], new int[textBoundaries.length]); |
|
207 |
idxLexicon.put(split[1], idx_lexicon_counter++); |
|
208 |
monitor.worked(1); |
|
209 |
} |
|
210 |
} |
|
211 |
keywordFileReader.close(); |
|
212 |
|
|
213 |
BufferedReader queriesFileReader = new BufferedReader(new InputStreamReader(new FileInputStream(queriesFile), "UTF-8")); //$NON-NLS-1$ |
|
214 |
ArrayList<String> lines = new ArrayList<String>(); |
|
215 |
l = queriesFileReader.readLine(); |
|
216 |
while (l != null) { |
|
217 |
lines.add(l); |
|
218 |
l = queriesFileReader.readLine(); |
|
219 |
} |
|
220 |
queriesFileReader.close(); |
|
221 |
|
|
222 |
System.out.println("Number of lemma lines: "+lines.size()); |
|
223 |
monitor.beginTask("Querying...", lines.size()); |
|
224 |
|
|
225 |
int nquery = 0; |
|
226 |
for (String line : lines) { |
|
227 |
String[] split = line.split("=", 2); //$NON-NLS-1$ |
|
228 |
if (split.length == 2) { |
|
229 |
if (hasLine(split[0])) { |
|
230 |
System.out.println(TXMCoreMessages.QueryIndex_2+line); |
|
231 |
} else { |
|
232 |
QueryResult result = corpus.query(new Query(split[1]), "Q"+nquery++, false); //$NON-NLS-1$ |
|
233 |
int[] starts = result.getStarts(); |
|
234 |
int[] ends = result.getEnds(); |
|
235 |
|
|
236 |
int[] counts = new int[textBoundaries.length]; |
|
237 |
int count = 0; |
|
238 |
int noText = 0; |
|
239 |
int endOfCurrentText = textBoundaries[noText]; |
|
240 |
for (int i : starts) { |
|
241 |
while (i >= endOfCurrentText) { |
|
242 |
if (noText >= textBoundaries.length) break; |
|
243 |
counts[noText] = count; |
|
244 |
noText++; |
|
245 |
if (noText >= textBoundaries.length) break; |
|
246 |
endOfCurrentText = textBoundaries[noText]; |
|
247 |
counts[noText] = 0; |
|
248 |
count = 0; |
|
249 |
} |
|
250 |
count++; |
|
251 |
} |
|
252 |
counts[noText] = count; |
|
253 |
|
|
254 |
if (idxLexicon.containsKey(split[1])) { // the query is already computed !! |
|
255 |
for (noText = 0 ; noText < textBoundaries.length ; noText++) { |
|
256 |
if (counts[noText] > 0) { |
|
257 |
//System.out.println("MCL-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1])); |
|
258 |
writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+counts[noText]+"\t"+idxLexicon.get(split[1])); |
|
259 |
} |
|
260 |
} |
|
261 |
} else { // test if match is covered by keywords |
|
262 |
idxLexicon.put(split[1], idx_lexicon_counter++); // put Lemma query |
|
263 |
|
|
264 |
//int max_sum_f = 0; |
|
265 |
int[] max_f = new int[textBoundaries.length]; |
|
266 |
String[] max_key = new String[textBoundaries.length]; |
|
267 |
|
|
268 |
for (String key : keywordEndPositions.keySet()) { |
|
269 |
int[] keyMaxValues = keywordsMaxCountsLexicon.get(key); |
|
270 |
int[] f = covered(starts, ends, keywordStartPositions.get(key), keywordEndPositions.get(key)); |
|
271 |
for (noText = 0 ; noText < textBoundaries.length ; noText++) { |
|
272 |
if (f[noText] > max_f[noText]) { |
|
273 |
max_f[noText] = f[noText]; |
|
274 |
max_key[noText] = key; |
|
275 |
} |
|
276 |
if (f[noText] > keyMaxValues[noText]) { |
|
277 |
keyMaxValues[noText] = f[noText]; |
|
278 |
} |
|
279 |
} |
|
280 |
} |
|
281 |
// System.out.println("max_f="+Arrays.toString(counts)); |
|
282 |
// System.out.println("max_f="+Arrays.toString(max_f)); |
|
283 |
for (noText = 0 ; noText < textBoundaries.length ; noText++) { |
|
284 |
int F = counts[noText] - max_f[noText]; |
|
285 |
if (F > 0) { |
|
286 |
//System.out.println("L-"+split[1]+"\t"+noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0"); |
|
287 |
writer.println(noText+"\t"+idxLexicon.get(split[1])+"\t"+F+"\t0"); |
|
288 |
} |
|
289 |
} |
|
290 |
} |
|
291 |
} |
|
292 |
} |
|
293 |
monitor.worked(1); |
|
294 |
} |
|
295 |
|
|
296 |
monitor.setMessage("Finalizing doc_word_freq_2.txt file..."); |
|
297 |
for (String key : keywordsMaxCountsLexicon.keySet()) { |
|
298 |
int[] keyMaxValues = keywordsMaxCountsLexicon.get(key); |
|
299 |
for (int noText = 0 ; noText < textBoundaries.length ; noText++) { |
|
300 |
int F = keyMaxValues[noText]; |
|
301 |
if (F > 0) { |
|
302 |
//System.out.println("MC-"+key+"\t"+noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key)); |
|
303 |
writer.println(noText+"\t"+idxLexicon.get(key)+"\t"+F+"\t"+idxLexicon.get(key)); |
|
304 |
} |
|
305 |
} |
|
306 |
} |
|
307 |
queriesFileReader.close(); |
|
308 |
writer.close(); |
|
309 |
|
|
310 |
monitor.setMessage("Writing lexicon file..."); |
|
311 |
writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lexiconFile), "UTF-8"))); |
|
312 |
for (String query : idxLexicon.keySet()) { |
|
313 |
String name = keywordQueriestoName.get(query); |
|
314 |
if (name != null) { |
|
315 |
writer.println(keywordQueriestoName.get(query)); |
|
316 |
} else { |
|
317 |
writer.println(query); |
|
318 |
} |
|
319 |
|
|
320 |
} |
|
321 |
writer.close(); |
|
322 |
|
|
323 |
System.out.println("Done, result saved in: \n - "+outputFile.getAbsolutePath()+"\n - "+lexiconFile.getAbsolutePath()); |
|
324 |
} |
|
325 |
|
|
326 |
/** |
|
327 |
* Write all the lines on a writer. |
|
328 |
* |
|
329 |
* @param outfile the outfile |
|
330 |
* @param encoding the encoding |
|
331 |
* @param colseparator the colseparator |
|
332 |
* @param txtseparator the txtseparator |
|
333 |
* @return true, if successful |
|
334 |
*/ |
|
335 |
public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) { |
|
336 |
try { |
|
337 |
toTxt(outfile, 0, lines.size(), encoding, colseparator, txtseparator); |
|
338 |
} catch (Exception e) { |
|
339 |
System.err.println(IndexCoreMessages.Index_7 + Log.toString(e)); |
|
340 |
return false; |
|
341 |
} |
|
342 |
return true; |
|
343 |
} |
|
344 |
|
|
345 |
/** |
|
346 |
* Write the lines between from and to on a writer. |
|
347 |
* |
|
348 |
* @param outfile the outfile |
|
349 |
* @param from The first line to be written |
|
350 |
* @param to The last line to be writen |
|
351 |
* @param encoding the encoding |
|
352 |
* @param colseparator the colseparator |
|
353 |
* @param txtseparator the txtseparator |
|
354 |
* @throws CqiClientException the cqi client exception |
|
355 |
* @throws IOException Signals that an I/O exception has occurred. |
|
356 |
*/ |
|
357 |
public void toTxt(File outfile, int from, int to, String encoding, String colseparator, String txtseparator) |
|
358 |
throws CqiClientException, IOException { |
|
359 |
// NK: writer declared as class attribute to perform a clean if the operation is interrupted |
|
360 |
this.writer = new OutputStreamWriter(new FileOutputStream(outfile), |
|
361 |
encoding); |
|
362 |
String header = "Queries"; //$NON-NLS-1$ |
|
363 |
header = txtseparator+ header.substring(0, header.length() - 1) +txtseparator; |
|
364 |
header += colseparator+ txtseparator+ "F" + txtseparator; //$NON-NLS-1$ |
|
365 |
|
|
366 |
for (String t : texts) |
|
367 |
header += colseparator + txtseparator+ t.replace(txtseparator, txtseparator+txtseparator)+txtseparator; |
|
368 |
header += "\n"; //$NON-NLS-1$ |
|
369 |
writer.write(header); |
|
370 |
|
|
371 |
// for(Line ligne: lines) |
|
372 |
for (String name : lines.keySet()) { |
|
373 |
QueryIndexLine ligne = lines.get(name); |
|
374 |
writer.write(txtseparator+ ligne.getName().replace(txtseparator, txtseparator+txtseparator)+ txtseparator + colseparator + ligne.getFrequency()); |
|
375 |
|
|
376 |
for (int j = 0; j < texts.size(); j++) |
|
377 |
writer.write(colseparator + ligne.getFrequency(j)); |
|
378 |
writer.write("\n"); //$NON-NLS-1$ |
|
379 |
} |
|
380 |
writer.flush(); |
|
381 |
writer.close(); |
|
382 |
} |
|
383 |
|
|
384 |
/** |
|
385 |
* Only one query result and uses texts boundaries to count frequencies for each text |
|
386 |
* @param name |
|
387 |
* @param query |
|
388 |
* @return |
|
389 |
* @throws CqiClientException |
|
390 |
*/ |
|
391 |
public QueryIndexLine addLine(String name, Query query) throws CqiClientException { |
|
392 |
if (lines.containsKey(name)) return null; |
|
393 |
QueryResult qresult = corpus.query(query, "tmp", true); //$NON-NLS-1$ |
|
394 |
//System.out.println(query.toString()+" "+qresult.getNMatch()); |
|
395 |
int[] counts = new int[textBoundaries.length]; |
|
396 |
int count = 0; |
|
397 |
int noText = 0; |
|
398 |
int endOfCurrentText = textBoundaries[noText]; |
|
399 |
for (Match m : qresult.getMatches()) { |
|
400 |
while (m.getStart() >= endOfCurrentText) { |
|
401 |
//System.out.println(m.getStart() +">="+endOfCurrentText); |
|
402 |
if (noText >= textBoundaries.length) break; |
|
403 |
//System.out.println("Text: "+texts.get(noText)+" count="+count+" notext="+noText); |
|
404 |
counts[noText] = count; |
|
405 |
noText++; |
|
406 |
if (noText >= textBoundaries.length) break; |
|
407 |
endOfCurrentText = textBoundaries[noText]; |
|
408 |
counts[noText] = 0; |
|
409 |
count = 0; |
|
410 |
} |
|
411 |
count++; |
|
412 |
} |
|
413 |
//System.out.println(noText +"<?"+textBoundaries.length+" count="+count); |
|
414 |
if (noText < textBoundaries.length) // last text |
|
415 |
counts[noText] = count; |
|
416 |
|
|
417 |
qresult.drop(); |
|
418 |
|
|
419 |
QueryIndexLine line = new QueryIndexLine(name, query, null); |
|
420 |
line.setFrequencies(counts); |
|
421 |
lines.put(name, line); |
|
422 |
return line; |
|
423 |
} |
|
424 |
|
|
425 |
public LexicalTable toLexicalTable() { |
|
426 |
|
|
427 |
int npart = texts.size(); |
|
428 |
int[][] freqs = new int[lines.size()][npart]; |
|
429 |
String[] rownames = new String[lines.size()]; |
|
430 |
String[] colnames = new String[npart]; |
|
431 |
|
|
432 |
for (int i = 0 ; i < lines.size() ; i++) { |
|
433 |
QueryIndexLine line = lines.get(i); |
|
434 |
int[] linefreqs = line.getFreqs(); |
|
435 |
rownames[i] = line.getName(); |
|
436 |
for (int j = 0 ; j < npart ; j++) { |
|
437 |
freqs[i][j] = linefreqs[j]; |
|
438 |
} |
|
439 |
} |
|
440 |
int j = 0; |
|
441 |
for (String t : texts) { |
|
442 |
colnames[j] = t; |
|
443 |
j++; |
|
444 |
} |
|
445 |
|
|
446 |
try { |
|
447 |
LexicalTable lt = new LexicalTable(corpus, corpus.getProperty("word"), new LexicalTableImpl(freqs, rownames, colnames)); |
|
448 |
return lt; |
|
449 |
} catch (RWorkspaceException e) { |
|
450 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
451 |
} catch (CqiClientException e) { |
|
452 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
453 |
} |
|
454 |
return null; |
|
455 |
} |
|
456 |
|
|
457 |
public boolean removeLine(String name) { |
|
458 |
if (lines.containsKey(name)) { |
|
459 |
this.acquireSemaphore(); |
|
460 |
lines.remove(name); |
|
461 |
this.releaseSemaphore(); |
|
462 |
return true; |
|
463 |
} else { |
|
464 |
return false; |
|
465 |
} |
|
466 |
} |
|
467 |
|
|
468 |
public boolean hasLine(String name) { |
|
469 |
return lines.containsKey(name); |
|
470 |
} |
|
471 |
|
|
472 |
public ArrayList<QueryIndexLine> getLines(int from, int to) { |
|
473 |
if (lines.size() == 0) return new ArrayList<QueryIndexLine>(); |
|
474 |
|
|
475 |
if (from < 0) from = 0; |
|
476 |
if (to < 0) to = 0; |
|
477 |
if (to > lines.size()) to = lines.size(); |
|
478 |
if (from > to) from = to - 1; |
|
479 |
ArrayList<QueryIndexLine> tmp = new ArrayList<QueryIndexLine>(); |
|
480 |
int i = 0; |
|
481 |
for (QueryIndexLine line : lines.values()) { |
|
482 |
if (i >= from && i < to) { |
|
483 |
tmp.add(line); |
|
484 |
} |
|
485 |
i++; |
|
486 |
} |
|
487 |
|
|
488 |
return tmp; |
|
489 |
} |
|
490 |
|
|
491 |
@Override |
|
492 |
public boolean delete() { |
|
493 |
if (corpus != null) return corpus.removeResult(this); |
|
494 |
return false; |
|
495 |
} |
|
496 |
|
|
497 |
@Override |
|
498 |
public String[] getExportTXTExtensions() { |
|
499 |
return new String[]{"*.csv"}; |
|
500 |
} |
|
501 |
|
|
502 |
@Override |
|
503 |
public void clean() { |
|
504 |
try { |
|
505 |
this.writer.flush(); |
|
506 |
this.writer.close(); |
|
507 |
} catch (IOException e) { |
|
508 |
// TODO Auto-generated catch block |
|
509 |
org.txm.utils.logger.Log.printStackTrace(e); |
|
510 |
} |
|
511 |
} |
|
512 |
|
|
513 |
@Override |
|
514 |
public Object getAdapter(Class adapterType) { |
|
515 |
if (adapterType == IWorkbenchAdapter.class) |
|
516 |
return ExpIIAAdapter; |
|
517 |
return null; |
|
518 |
} |
|
519 |
|
|
520 |
/** The WordCloud adapter. */ |
|
521 |
private static IWorkbenchAdapter ExpIIAAdapter = new IWorkbenchAdapter() { |
|
522 |
|
|
523 |
@Override |
|
524 |
public Object[] getChildren(Object o) { |
|
525 |
return new Object[0]; |
|
526 |
} |
|
527 |
|
|
528 |
@Override |
|
529 |
public ImageDescriptor getImageDescriptor(Object object) { |
|
530 |
return null; |
|
531 |
} |
|
532 |
|
|
533 |
@Override |
|
534 |
public String getLabel(Object o) { |
|
535 |
return ((ExpII) o).getName(); |
|
536 |
} |
|
537 |
|
|
538 |
@Override |
|
539 |
public Object getParent(Object o) { |
|
540 |
return ((ExpII) o).getCorpus(); |
|
541 |
} |
|
542 |
}; |
|
543 |
|
|
544 |
|
|
545 |
|
|
546 |
private int[] covered(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) { |
|
547 |
int[] counts = new int[textBoundaries.length]; |
|
548 |
int noText = 0; |
|
549 |
int endOfCurrentText = textBoundaries[noText]; |
|
550 |
int f = 0; |
|
551 |
// printMatchs(starts, ends, keywordStarts, keywordEnds); |
|
552 |
|
|
553 |
int i_keyword = 0; |
|
554 |
//System.out.println("len i="+starts.length+ " len i_keyword="+keywordStarts.length); |
|
555 |
for (int i = 0 ; i < starts.length ; ) { |
|
556 |
|
|
557 |
//System.out.println("i="+i+ " i_keyword="+i_keyword); |
|
558 |
if (i_keyword >= keywordStarts.length) break; // no more keyword positions |
|
559 |
//System.out.println(""+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]); |
|
560 |
|
|
561 |
while (starts[i] >= endOfCurrentText) { |
|
562 |
if (noText >= textBoundaries.length) break; |
|
563 |
counts[noText] = f; |
|
564 |
noText++; |
|
565 |
if (noText >= textBoundaries.length) break; |
|
566 |
endOfCurrentText = textBoundaries[noText]; |
|
567 |
counts[noText] = 0; |
|
568 |
f = 0; |
|
569 |
} |
|
570 |
if (starts[i] < keywordStarts[i_keyword]) { |
|
571 |
//System.out.println(" match start is not covered"); |
|
572 |
i++; |
|
573 |
} else if (starts[i] > keywordEnds[i_keyword]) { |
|
574 |
//System.out.println(" next keyword"); |
|
575 |
i_keyword++; // see next keyword match |
|
576 |
} else if (ends[i] <= keywordEnds[i_keyword]) { |
|
577 |
//System.out.println(" next match"); |
|
578 |
//System.out.println(">>>> "+starts[i]+"->"+ends[i]+" : "+keywordStarts[i_keyword]+"->"+keywordEnds[i_keyword]); |
|
579 |
i++; // OK, test next match |
|
580 |
f++; |
|
581 |
} else { |
|
582 |
//System.out.println(" match end is not covered"); |
|
583 |
i++; // OK, test next match |
|
584 |
} |
|
585 |
} |
|
586 |
counts[noText] = f; |
|
587 |
return counts; |
|
588 |
} |
|
589 |
|
|
590 |
private static void printMatchs(int[] starts, int[] ends, int[] keywordStarts, int[] keywordEnds) { |
|
591 |
int min = 999999999; |
|
592 |
int max = 0; |
|
593 |
for (int i : starts) if (i < min) min = i; |
|
594 |
for (int i : keywordStarts) if (i < min) min = i; |
|
595 |
for (int i : ends) if (i > max) max = i; |
|
596 |
for (int i : keywordEnds) if (i > max) max = i; |
|
597 |
for (int i = min ; i <= max ; i++) System.out.print(""+i+"\t"); |
|
598 |
System.out.println(); |
|
599 |
int j = 0; |
|
600 |
int k = 0; |
|
601 |
boolean inout = false; |
|
602 |
for (int i = min ; i <= max ; i++) { |
|
603 |
if (j < starts.length && starts[j] == i) { |
|
604 |
if (k < ends.length && ends[k] == i) { |
|
605 |
System.out.print(""+starts[j++]+"><\t"); |
|
606 |
k++; |
|
607 |
} else { |
|
608 |
System.out.print(""+starts[j++]+">\t"); |
|
609 |
inout = true; |
|
610 |
} |
|
611 |
} else if (k < ends.length && ends[k] == i) { |
|
612 |
System.out.print("<"+ends[k++]+"\t"); |
|
613 |
inout = false; |
|
614 |
} else { |
|
615 |
if (inout) { |
|
616 |
System.out.print("-\t"); |
|
617 |
} else { |
|
618 |
System.out.print("\t"); |
|
619 |
} |
|
620 |
} |
|
621 |
} |
|
622 |
System.out.println(); |
|
623 |
|
|
624 |
j = 0; |
|
625 |
k = 0; |
|
626 |
inout = false; |
|
627 |
for (int i = min ; i <= max ; i++) { |
|
628 |
if (j < keywordStarts.length && keywordStarts[j] == i) { |
|
629 |
if (k < keywordEnds.length && keywordEnds[k] == i) { |
|
630 |
System.out.print(""+keywordStarts[j++]+"><\t"); |
|
631 |
k++; |
|
632 |
} else { |
|
633 |
System.out.print(""+keywordStarts[j++]+">\t"); |
|
634 |
inout = true; |
|
635 |
} |
|
636 |
} else if (k < keywordEnds.length && keywordEnds[k] == i) { |
|
637 |
System.out.print("<"+keywordEnds[k++]+"\t"); |
|
638 |
inout = false; |
|
639 |
} else { |
|
640 |
if (inout) { |
|
641 |
System.out.print("-\t"); |
|
642 |
} else { |
|
643 |
System.out.print("\t"); |
|
644 |
} |
|
645 |
|
|
646 |
} |
|
647 |
} |
|
648 |
System.out.println(); |
|
649 |
} |
|
650 |
|
|
651 |
public static void main(String[] args) { |
|
652 |
// int starts[] = {1, 10, 16, 24}; |
|
653 |
// int ends[] = {2, 12, 16, 30}; |
|
654 |
// int kstarts[] = {5, 9, 16, 22}; |
|
655 |
// int kends[] = {8, 12, 16, 35}; |
|
656 |
// System.out.println(covered(starts, ends, kstarts, kends)); |
|
657 |
|
|
658 |
// int starts[] = {10, 15 ,24, 50}; |
|
659 |
// int ends[] = {12, 16 ,30, 60}; |
|
660 |
// int kstarts[] = {5, 9, 14, 20}; |
|
661 |
// int kends[] = {8, 12, 18, 40}; |
|
662 |
// System.out.println(covered(starts, ends, kstarts, kends)); |
|
663 |
|
|
664 |
// int starts[] = {1, 10, 15 ,24}; |
|
665 |
// int ends[] = {2, 12, 16 ,30}; |
|
666 |
// int kstarts[] = {5, 9, 14, 20}; |
|
667 |
// int kends[] = {8, 12, 18, 40}; |
|
668 |
// System.out.println(covered(starts, ends, kstarts, kends)); |
|
669 |
|
|
670 |
// int starts[] = {4, 10, 16}; |
|
671 |
// int ends[] = {5, 11, 17}; |
|
672 |
// int kstarts[] = {3, 11, 15}; |
|
673 |
// int kends[] = {8, 13, 20}; |
|
674 |
// System.out.println(covered(starts, ends, kstarts, kends)); |
|
675 |
|
|
676 |
int starts[] = {4, 10, 16, 20}; |
|
677 |
int ends[] = {5, 11, 17, 22}; |
|
678 |
int kstarts[] = {3, 11, 15}; |
|
679 |
int kends[] = {8, 13, 20}; |
|
680 |
//System.out.println(covered(starts, ends, kstarts, kends)); |
|
681 |
} |
|
682 |
|
|
683 |
@Override |
|
684 |
public boolean compute(ProgressWatcher watcher) throws Exception { |
|
685 |
System.out.println("ExpII.compute(monitor): not implemented."); |
|
686 |
return false; |
|
687 |
} |
|
688 |
|
|
689 |
@Override |
|
690 |
public String getSimpleName() { |
|
691 |
return "ExpII"; |
|
692 |
} |
|
693 |
|
|
694 |
@Override |
|
695 |
public String getDetails() { |
|
696 |
return texts.toString(); |
|
697 |
} |
|
698 |
} |
|
0 | 699 |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/package.html (revision 468) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>Prototype of Query index.</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.cql2lsa.rcp/src/org/txm/functions/cql2lsa/ExpI.java (revision 468) | ||
---|---|---|
1 |
package org.txm.functions.cql2lsa; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.File; |
|
5 |
import java.io.FileInputStream; |
|
6 |
import java.io.FileOutputStream; |
|
7 |
import java.io.IOException; |
|
8 |
import java.io.InputStreamReader; |
|
9 |
import java.io.OutputStreamWriter; |
|
10 |
import java.util.ArrayList; |
|
11 |
import java.util.Collection; |
|
12 |
import java.util.Collections; |
|
13 |
import java.util.Comparator; |
|
14 |
import java.util.LinkedHashMap; |
|
15 |
import java.util.List; |
|
16 |
import java.util.Map; |
|
17 |
|
|
18 |
import org.eclipse.core.runtime.IAdaptable; |
|
19 |
import org.eclipse.jface.resource.ImageDescriptor; |
|
20 |
import org.eclipse.ui.model.IWorkbenchAdapter; |
|
21 |
import org.txm.core.messages.TXMCoreMessages; |
|
22 |
import org.txm.functions.ProgressWatcher; |
|
23 |
import org.txm.functions.TXMCommand; |
|
24 |
import org.txm.index.core.functions.LineComparator.SortMode; |
|
25 |
import org.txm.index.core.messages.IndexCoreMessages; |
|
26 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl; |
|
27 |
import org.txm.rcp.IImageKeys; |
|
28 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
29 |
import org.txm.searchengine.cqp.clientExceptions.InvalidCqpIdException; |
|
30 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
31 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
32 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
33 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
34 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
35 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException; |
|
36 |
import org.txm.utils.logger.Log; |
|
37 |
|
|
38 |
public class ExpI extends TXMCommand implements IAdaptable { |
|
39 |
Corpus corpus; |
|
40 |
List<String> texts; |
|
41 |
int[] textBoundaries; |
|
42 |
|
|
43 |
LinkedHashMap<String, QueryIndexLine> lines = new LinkedHashMap<String, QueryIndexLine>(); |
|
44 |
|
|
45 |
/** The writer. */ |
|
46 |
private OutputStreamWriter writer; |
|
47 |
|
|
48 |
public ExpI(Corpus corpus) throws CqiClientException, IOException, CqiServerError, InvalidCqpIdException { |
Formats disponibles : Unified diff