Révision 2179
| tmp/org.txm.tigersearch.rcp/build.properties (revision 2179) | ||
|---|---|---|
| 8 | 8 |
lib/dom4j-1.6.1.jar,\ |
| 9 | 9 |
lib/log4j-1.2.12.jar,\ |
| 10 | 10 |
lib/TigerSearch.jar,\ |
| 11 |
groovy/org/,\
|
|
| 12 |
OSGI-INF/
|
|
| 11 |
OSGI-INF/,\
|
|
| 12 |
groovy/
|
|
| 13 | 13 |
source..=src/ |
| 14 | 14 |
qualifier=svn |
| tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 2179) | ||
|---|---|---|
| 1 | 1 |
Manifest-Version: 1.0 |
| 2 | 2 |
Require-Bundle: org.txm.index.rcp;bundle-version="1.0.0";visibility:=reexport, |
| 3 |
org.txm.groovy.core;bundle-version="1.0.0";visibility:=reexport |
|
| 3 |
org.txm.groovy.core;bundle-version="1.0.0";visibility:=reexport, |
|
| 4 |
org.txm.searchengine.core |
|
| 4 | 5 |
Export-Package: ims.jmanual, |
| 5 | 6 |
ims.tiger.corpus, |
| 6 | 7 |
ims.tiger.export, |
| tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERRatioMacro.groovy (revision 2179) | ||
|---|---|---|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.tiger.exploit |
|
| 6 |
|
|
| 7 |
import groovy.transform.Field |
|
| 8 |
|
|
| 9 |
import org.txm.searchengine.core.SearchEnginesManager |
|
| 10 |
import org.txm.searchengine.cqp.corpus.* |
|
| 11 |
import org.txm.searchengine.ts.TIGERSearchEngine |
|
| 12 |
|
|
| 13 |
def scriptName = this.class.getSimpleName() |
|
| 14 |
|
|
| 15 |
def selection = [] |
|
| 16 |
for (def s : corpusViewSelections) {
|
|
| 17 |
if (s instanceof CQPCorpus) selection << s |
|
| 18 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 19 |
} |
|
| 20 |
|
|
| 21 |
if (selection.size() == 0) {
|
|
| 22 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 23 |
return false |
|
| 24 |
} else {
|
|
| 25 |
for (def c : selection) c.compute(false) |
|
| 26 |
} |
|
| 27 |
|
|
| 28 |
@Field @Option(name="tiger_query_A", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]") |
|
| 29 |
String tiger_query_A |
|
| 30 |
@Field @Option(name="tiger_query_B", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]") |
|
| 31 |
String tiger_query_B |
|
| 32 |
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true") |
|
| 33 |
def count_subgraph |
|
| 34 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 35 |
debug |
|
| 36 |
if (!ParametersDialog.open(this)) return |
|
| 37 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 38 |
|
|
| 39 |
|
|
| 40 |
def results_A = gse.run(TIGERMatchesMacro, ["args":["tiger_query":tiger_query_A, "count_subgraph":count_subgraph, "debug":debug], |
|
| 41 |
"selection":selection, |
|
| 42 |
"selections":selections, |
|
| 43 |
"corpusViewSelection":corpusViewSelection, |
|
| 44 |
"corpusViewSelections":corpusViewSelections, |
|
| 45 |
"monitor":monitor]) |
|
| 46 |
|
|
| 47 |
def results_B = gse.run(TIGERMatchesMacro, ["args":["tiger_query":tiger_query_B, "count_subgraph":count_subgraph, "debug":debug], |
|
| 48 |
"selection":selection, |
|
| 49 |
"selections":selections, |
|
| 50 |
"corpusViewSelection":corpusViewSelection, |
|
| 51 |
"corpusViewSelections":corpusViewSelections, |
|
| 52 |
"monitor":monitor]) |
|
| 53 |
|
|
| 54 |
println results_A |
|
| 55 |
println results_B |
|
| 56 |
for (def corpus : selection) {
|
|
| 57 |
String.format( "%.2f", coef * 100.0d ) |
|
| 58 |
println "R = ${results_A[corpus]} / ${results_B[corpus]} = "+String.format( "%.2f", results_A[corpus] / results_B[corpus])
|
|
| 59 |
} |
|
| 60 |
println "Done." |
|
| tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERMatchesMacro.groovy (revision 2179) | ||
|---|---|---|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.tiger.exploit |
|
| 6 |
|
|
| 7 |
import groovy.transform.Field |
|
| 8 |
|
|
| 9 |
import org.txm.searchengine.core.SearchEnginesManager |
|
| 10 |
import org.txm.searchengine.cqp.corpus.* |
|
| 11 |
import org.txm.searchengine.ts.TIGERSearchEngine |
|
| 12 |
|
|
| 13 |
def scriptName = this.class.getSimpleName() |
|
| 14 |
|
|
| 15 |
def selection = [] |
|
| 16 |
for (def s : corpusViewSelections) {
|
|
| 17 |
if (s instanceof CQPCorpus) selection << s |
|
| 18 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 19 |
} |
|
| 20 |
|
|
| 21 |
if (selection.size() == 0) {
|
|
| 22 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 23 |
return false |
|
| 24 |
} else {
|
|
| 25 |
for (def c : selection) c.compute(false) |
|
| 26 |
} |
|
| 27 |
|
|
| 28 |
@Field @Option(name="tiger_query", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]") |
|
| 29 |
String tiger_query |
|
| 30 |
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true") |
|
| 31 |
def count_subgraph |
|
| 32 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 33 |
debug |
|
| 34 |
if (!ParametersDialog.open(this)) return |
|
| 35 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 36 |
|
|
| 37 |
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine() |
|
| 38 |
println tse |
|
| 39 |
|
|
| 40 |
|
|
| 41 |
println "\t"+selection.join("\t")
|
|
| 42 |
print "F" |
|
| 43 |
|
|
| 44 |
def results = new LinkedHashMap() |
|
| 45 |
for (def corpus : selection) {
|
|
| 46 |
|
|
| 47 |
def root = corpus.getRootCorpusBuild(); |
|
| 48 |
File buildDirectory = new File(root.getProjectDirectory(), "tiger"); |
|
| 49 |
|
|
| 50 |
if (!tse.hasIndexes(corpus)) {
|
|
| 51 |
println "Warning: skipping $corpus: no TIGERSearch indexes found." |
|
| 52 |
continue; |
|
| 53 |
} |
|
| 54 |
|
|
| 55 |
def tcorpus = tse.getTSCorpus(corpus); |
|
| 56 |
def sentences_min_max = tse.getSentMinMax(corpus); |
|
| 57 |
def mresult = tcorpus.manager.processQuery(tiger_query, sentences_min_max[0], sentences_min_max[1], 9999999); |
|
| 58 |
int size = 0; |
|
| 59 |
if (count_subgraph) {
|
|
| 60 |
size = mresult.submatchSize(); |
|
| 61 |
} else {
|
|
| 62 |
size = mresult.size(); |
|
| 63 |
} |
|
| 64 |
results[corpus] = size |
|
| 65 |
print "\t"+size |
|
| 66 |
} |
|
| 67 |
println "" |
|
| 68 |
|
|
| 69 |
println "Done." |
|
| 70 |
|
|
| 71 |
return results |
|
| tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERIndexMacro.groovy (revision 2179) | ||
|---|---|---|
| 1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté |
|
| 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
| 3 |
// @author mdecorde |
|
| 4 |
// STANDARD DECLARATIONS |
|
| 5 |
package org.txm.macro.tiger.exploit |
|
| 6 |
|
|
| 7 |
import groovy.transform.Field |
|
| 8 |
|
|
| 9 |
import java.util.List |
|
| 10 |
|
|
| 11 |
import org.txm.searchengine.core.EmptySelection |
|
| 12 |
import org.txm.searchengine.core.SearchEnginesManager |
|
| 13 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
| 14 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
| 15 |
import org.txm.searchengine.cqp.corpus.* |
|
| 16 |
import org.txm.searchengine.ts.TIGERSearchEngine |
|
| 17 |
import org.txm.searchengine.ts.TSCorpus |
|
| 18 |
import org.txm.searchengine.ts.TSResult |
|
| 19 |
import org.txm.utils.ConsoleProgressBar |
|
| 20 |
import org.txm.utils.logger.Log |
|
| 21 |
|
|
| 22 |
import ims.tiger.corpus.Sentence |
|
| 23 |
import ims.tiger.query.api.*; |
|
| 24 |
|
|
| 25 |
def scriptName = this.class.getSimpleName() |
|
| 26 |
|
|
| 27 |
def selection = [] |
|
| 28 |
for (def s : corpusViewSelections) {
|
|
| 29 |
if (s instanceof CQPCorpus) selection << s |
|
| 30 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 31 |
} |
|
| 32 |
|
|
| 33 |
if (selection.size() == 0) {
|
|
| 34 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 35 |
return false |
|
| 36 |
} else {
|
|
| 37 |
for (def c : selection) c.compute(false) |
|
| 38 |
} |
|
| 39 |
|
|
| 40 |
@Field @Option(name="tiger_query", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]") |
|
| 41 |
String tiger_query |
|
| 42 |
@Field @Option(name="labels", usage="List of TIGER labels separated with ','", widget="String", required=true, def="A,B,C") |
|
| 43 |
def labels |
|
| 44 |
@Field @Option(name="properties", usage="List of properties separated with ','", widget="String", required=true, def="word, word, word") |
|
| 45 |
def properties |
|
| 46 |
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true") |
|
| 47 |
def count_subgraph |
|
| 48 |
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq labels", required=true, def="freq") |
|
| 49 |
String sort_column |
|
| 50 |
@Field @Option(name="max_lines", usage="choose between the len or freq columns", widget="Integer", metaVar="freq labels", required=true, def="-1") |
|
| 51 |
int max_lines |
|
| 52 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
| 53 |
debug |
|
| 54 |
if (!ParametersDialog.open(this)) return |
|
| 55 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
| 56 |
|
|
| 57 |
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine() |
|
| 58 |
|
|
| 59 |
if (labels.length() == 0) {
|
|
| 60 |
println "Error: no label given. Aborting" |
|
| 61 |
return false; |
|
| 62 |
} else {
|
|
| 63 |
labels = labels.split(",")
|
|
| 64 |
} |
|
| 65 |
if (properties.length() == 0) {
|
|
| 66 |
println "Error: no property given. Aborting" |
|
| 67 |
return false; |
|
| 68 |
} else {
|
|
| 69 |
properties = properties.split(",")
|
|
| 70 |
} |
|
| 71 |
|
|
| 72 |
if (properties.size() != labels.size()) {
|
|
| 73 |
println "Error: the number of labels and properties does not matches. labels=$labels properties=$properties (${labels.size()} != {properties.size()})"
|
|
| 74 |
return false; |
|
| 75 |
} |
|
| 76 |
|
|
| 77 |
def missing = [] |
|
| 78 |
for (def l : labels) {
|
|
| 79 |
if (!tiger_query.contains("#${l}:")) {
|
|
| 80 |
missing << l |
|
| 81 |
} |
|
| 82 |
} |
|
| 83 |
if (missing.size() > 0) {
|
|
| 84 |
println "Error: $missing label(s) not found in query: $tiger_query" |
|
| 85 |
return false |
|
| 86 |
} |
|
| 87 |
|
|
| 88 |
def results = new LinkedHashMap() |
|
| 89 |
def total_counts = [:] // List<String> -> Integer |
|
| 90 |
|
|
| 91 |
ConsoleProgressBar cpb = new ConsoleProgressBar(selection.size()) |
|
| 92 |
for (def corpus : selection) {
|
|
| 93 |
|
|
| 94 |
def counts = [:] // List<String> -> Integer |
|
| 95 |
def root = corpus.getRootCorpusBuild(); |
|
| 96 |
File buildDirectory = new File(root.getProjectDirectory(), "tiger"); |
|
| 97 |
|
|
| 98 |
if (!tse.hasIndexes(corpus)) {
|
|
| 99 |
println "Warning: skipping $corpus: no TIGERSearch indexes found." |
|
| 100 |
continue; |
|
| 101 |
} |
|
| 102 |
|
|
| 103 |
def tcorpus = tse.getTSCorpus(corpus); |
|
| 104 |
def index = tcorpus.getIndex() |
|
| 105 |
def sentences_min_max = tse.getSentMinMax(corpus); |
|
| 106 |
MatchResult mresult = tcorpus.manager.processQuery(tiger_query, sentences_min_max[0], sentences_min_max[1], 9999999); |
|
| 107 |
|
|
| 108 |
List<String> variables = java.util.Arrays.asList(mresult.getVariableNames()); |
|
| 109 |
def iVariables = [] |
|
| 110 |
for (def l : labels) {
|
|
| 111 |
iVariables << variables.indexOf(l) |
|
| 112 |
} |
|
| 113 |
|
|
| 114 |
def matches = mresult.matches |
|
| 115 |
int size = matches.keySet().size(); |
|
| 116 |
if (debug > 0) println "$size sentences matched." |
|
| 117 |
for (int sent : matches.keySet()) { // the matching sentences
|
|
| 118 |
if (!mresult.isMatchingSentence(sent)) continue; |
|
| 119 |
|
|
| 120 |
int sent_submatch_size = mresult.getSentenceSubmatchSize(sent); |
|
| 121 |
//println "sent no $sent with $sent_submatch_size submatches : " |
|
| 122 |
def sentence = index.getSentence(sent) |
|
| 123 |
for (int j = 0 ; j < sent_submatch_size ; j++) {
|
|
| 124 |
//println "submatch no $j" |
|
| 125 |
def positions = mresult.getSentenceSubmatchAt(sent, j) |
|
| 126 |
def strings = [] |
|
| 127 |
// println " positions=$positions" |
|
| 128 |
// println " properties=$properties" |
|
| 129 |
// println " iVariables=$iVariables" |
|
| 130 |
int t = 0; |
|
| 131 |
for (int iV : iVariables) {
|
|
| 132 |
def node = sentence.getNode(positions[iV]) |
|
| 133 |
String value = node.getFeature(properties[t]) |
|
| 134 |
t++ |
|
| 135 |
if (value == null) {
|
|
| 136 |
//println "Warning: no '${properties[iV]}' feature found in node="+node.getFeatures()
|
|
| 137 |
value = "NA" |
|
| 138 |
} |
|
| 139 |
strings << value |
|
| 140 |
} |
|
| 141 |
|
|
| 142 |
if (!counts.containsKey(strings)) {
|
|
| 143 |
counts[strings] = 0; |
|
| 144 |
} |
|
| 145 |
if (!total_counts.containsKey(strings)) {
|
|
| 146 |
total_counts[strings] = 0; |
|
| 147 |
} |
|
| 148 |
|
|
| 149 |
counts[strings] = counts[strings] + 1 |
|
| 150 |
total_counts[strings] = total_counts[strings] + 1 |
|
| 151 |
|
|
| 152 |
if (!count_subgraph) break; |
|
| 153 |
} |
|
| 154 |
} |
|
| 155 |
|
|
| 156 |
results[corpus] = counts |
|
| 157 |
cpb.tick() |
|
| 158 |
// DEBUG |
|
| 159 |
// def keys = [] |
|
| 160 |
// keys.addAll(counts.keySet()) |
|
| 161 |
// if ("freq".equals(sort_column)) {
|
|
| 162 |
// keys = keys.sort() {-counts[it]}
|
|
| 163 |
// } else {
|
|
| 164 |
// keys = keys.sort() |
|
| 165 |
// } |
|
| 166 |
// |
|
| 167 |
// for (def k : keys) {
|
|
| 168 |
// println "${k.join('_')} ${counts[k]}"
|
|
| 169 |
// } |
|
| 170 |
} |
|
| 171 |
cpb.done() |
|
| 172 |
|
|
| 173 |
def keys = [] |
|
| 174 |
keys.addAll(total_counts.keySet()) |
|
| 175 |
if ("freq".equals(sort_column)) {
|
|
| 176 |
keys = keys.sort() {-total_counts[it]}
|
|
| 177 |
} else {
|
|
| 178 |
keys = keys.sort() |
|
| 179 |
} |
|
| 180 |
|
|
| 181 |
println properties.join(", ")+"\t"+"F\t"+selection.join("\t")
|
|
| 182 |
|
|
| 183 |
int nline = 0; |
|
| 184 |
for (def k : keys) {
|
|
| 185 |
print "${k.join('_')} ${total_counts[k]}"
|
|
| 186 |
if (selection.size() > 1) {
|
|
| 187 |
for (def corpus : selection) {
|
|
| 188 |
def v = results[corpus][k]; |
|
| 189 |
if (v == null) v = 0; |
|
| 190 |
print "\t"+v |
|
| 191 |
} |
|
| 192 |
} |
|
| 193 |
println "" |
|
| 194 |
nline++ |
|
| 195 |
if (max_lines > 0 && nline >= max_lines) {
|
|
| 196 |
println "... (${keys.size() - max_lines})"
|
|
| 197 |
break; |
|
| 198 |
} |
|
| 199 |
} |
|
| tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/rcp/InstallGroovyTIGERFiles.java (revision 2179) | ||
|---|---|---|
| 30 | 30 |
scriptsPackageDirectory2.mkdirs(); |
| 31 | 31 |
BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory); |
| 32 | 32 |
BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory2); |
| 33 |
|
|
| 33 |
|
|
| 34 |
scriptsPackageDirectory = new File(userDirectory, "org/txm/macro/tiger"); |
|
| 35 |
scriptsPackageDirectory2 = new File(systemDirectory, "org/txm/macro/tiger"); |
|
| 36 |
scriptsPackageDirectory.mkdirs(); |
|
| 37 |
scriptsPackageDirectory2.mkdirs(); |
|
| 38 |
BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/macro/tiger", "", scriptsPackageDirectory); |
|
| 39 |
BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/macro/tiger", "", scriptsPackageDirectory2); |
|
| 40 |
|
|
| 34 | 41 |
return scriptsDirectory.exists(); |
| 35 | 42 |
} |
| 36 | 43 |
|
| tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpus.java (revision 2179) | ||
|---|---|---|
| 30 | 30 |
import ims.tiger.corpus.Header; |
| 31 | 31 |
import ims.tiger.export.ExportManager; |
| 32 | 32 |
import ims.tiger.gui.tigergraphviewer.TIGERGraphViewerConfiguration; |
| 33 |
import ims.tiger.index.reader.Index; |
|
| 34 |
import ims.tiger.index.reader.IndexException; |
|
| 33 | 35 |
import ims.tiger.query.internalapi.InternalCorpusQueryManager; |
| 34 | 36 |
import ims.tiger.query.internalapi.InternalCorpusQueryManagerLocal; |
| 35 | 37 |
import ims.tiger.query.processor.CorpusQueryProcessor; |
| 36 | 38 |
|
| 37 | 39 |
import java.io.File; |
| 38 | 40 |
import java.io.FileNotFoundException; |
| 41 |
import java.io.IOException; |
|
| 42 |
import java.io.RandomAccessFile; |
|
| 39 | 43 |
import java.io.UnsupportedEncodingException; |
| 44 |
import java.nio.MappedByteBuffer; |
|
| 45 |
import java.nio.channels.FileChannel; |
|
| 40 | 46 |
import java.util.HashMap; |
| 41 | 47 |
import java.util.List; |
| 42 | 48 |
|
| ... | ... | |
| 55 | 61 |
public TSCorpusManager tsmanager; |
| 56 | 62 |
public InternalCorpusQueryManagerLocal2 manager = null; |
| 57 | 63 |
ExportManager exporter; |
| 58 |
|
|
| 64 |
|
|
| 59 | 65 |
/** The config. */ |
| 60 | 66 |
TIGERGraphViewerConfiguration config; |
| 61 |
|
|
| 67 |
|
|
| 62 | 68 |
/** The initok. */ |
| 63 | 69 |
boolean initok = false; |
| 64 |
|
|
| 70 |
|
|
| 65 | 71 |
/** The results. */ |
| 66 | 72 |
HashMap<Integer, TSResult> results = new HashMap<Integer, TSResult>(); |
| 67 | 73 |
|
| 74 |
// Additional data for corpus alignment with TXM base corpus (CQP corpus) |
|
| 75 |
RandomAccessFile offsetsRAFile = null; |
|
| 76 |
FileChannel offsetsFileChannel = null; |
|
| 77 |
MappedByteBuffer offsetsMapped = null; // one offset per tiger position |
|
| 78 |
RandomAccessFile presencesRAFile = null; |
|
| 79 |
FileChannel presencesFileChannel = null; |
|
| 80 |
MappedByteBuffer presencesMapped = null; // one 0/1 boolean per tiger position |
|
| 81 |
|
|
| 82 |
private int[] sentence_starts; |
|
| 83 |
|
|
| 84 |
@Override |
|
| 85 |
public void finalize() {
|
|
| 86 |
try {
|
|
| 87 |
close(); |
|
| 88 |
} catch(Exception e) {
|
|
| 89 |
e.printStackTrace(); |
|
| 90 |
} |
|
| 91 |
} |
|
| 92 |
|
|
| 68 | 93 |
/** |
| 69 | 94 |
* Instantiates a new TS corpus. |
| 70 | 95 |
* |
| ... | ... | |
| 72 | 97 |
* @param tsmanager the tsmanager |
| 73 | 98 |
*/ |
| 74 | 99 |
public TSCorpus(String corpusId, TSCorpusManager tsmanager) {
|
| 75 |
|
|
| 100 |
|
|
| 76 | 101 |
String regpath = tsmanager.getRegistryPath(); |
| 77 | 102 |
String confpath = tsmanager.getconfPath(); |
| 78 | 103 |
try {
|
| 79 | 104 |
this.tsmanager = tsmanager; |
| 105 |
|
|
| 80 | 106 |
manager = new InternalCorpusQueryManagerLocal2(regpath); |
| 81 | 107 |
|
| 82 | 108 |
manager.getQueryProcessor(); |
| ... | ... | |
| 84 | 110 |
this.id = corpusId; |
| 85 | 111 |
initok = opencorpus(); |
| 86 | 112 |
exporter = new ExportManager(manager, ""); //$NON-NLS-1$ |
| 113 |
|
|
| 114 |
File offsetsFile = new File(regpath, corpusId+"/offsets.data"); |
|
| 115 |
if (offsetsFile.exists()) {
|
|
| 116 |
offsetsRAFile = new RandomAccessFile(offsetsFile, "rw"); |
|
| 117 |
offsetsFileChannel = offsetsRAFile.getChannel(); |
|
| 118 |
offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size()); |
|
| 119 |
} |
|
| 120 |
|
|
| 121 |
//out.putInt(positions[i]) |
|
| 122 |
|
|
| 123 |
File presencesFile = new File(regpath, corpusId+"/presences.data"); |
|
| 124 |
|
|
| 125 |
if (presencesFile.exists()) {
|
|
| 126 |
presencesRAFile = new RandomAccessFile(presencesFile, "rw"); |
|
| 127 |
presencesFileChannel = presencesRAFile.getChannel(); |
|
| 128 |
presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size()); |
|
| 129 |
} |
|
| 87 | 130 |
} catch (Exception e) {
|
| 88 | 131 |
// TODO Auto-generated catch block |
| 89 | 132 |
org.txm.utils.logger.Log.printStackTrace(e); |
| 90 | 133 |
} |
| 91 | 134 |
} |
| 92 | 135 |
|
| 136 |
public String getID() {
|
|
| 137 |
return this.id; |
|
| 138 |
} |
|
| 139 |
|
|
| 140 |
public void close() {
|
|
| 141 |
try {
|
|
| 142 |
if (presencesRAFile != null) presencesRAFile.close(); |
|
| 143 |
if (presencesFileChannel != null) presencesFileChannel.close(); |
|
| 144 |
if (offsetsRAFile != null) offsetsRAFile.close(); |
|
| 145 |
if (offsetsFileChannel != null) offsetsFileChannel.close(); |
|
| 146 |
if (sentence_starts != null) sentence_starts = null; |
|
| 147 |
if (results != null) results.clear(); |
|
| 148 |
} catch (IOException e) {
|
|
| 149 |
// TODO Auto-generated catch block |
|
| 150 |
e.printStackTrace(); |
|
| 151 |
} |
|
| 152 |
} |
|
| 153 |
|
|
| 154 |
public int getOffset(int tigerPosition) {
|
|
| 155 |
if (offsetsMapped != null) {
|
|
| 156 |
return offsetsMapped.getInt(tigerPosition*Integer.BYTES); |
|
| 157 |
} else {
|
|
| 158 |
return 0; |
|
| 159 |
} |
|
| 160 |
} |
|
| 161 |
|
|
| 162 |
public int[] getOffsets(int tigerPositions[]) {
|
|
| 163 |
int[] ret = new int[tigerPositions.length]; |
|
| 164 |
if (offsetsMapped != null) {
|
|
| 165 |
for (int i = 0 ; i < tigerPositions.length ; i++) {
|
|
| 166 |
ret[i] = offsetsMapped.getInt(tigerPositions[i]*Integer.BYTES); |
|
| 167 |
} |
|
| 168 |
} |
|
| 169 |
|
|
| 170 |
return ret; |
|
| 171 |
} |
|
| 172 |
|
|
| 173 |
public MappedByteBuffer getOffsetsMapped() {
|
|
| 174 |
return offsetsMapped; |
|
| 175 |
} |
|
| 176 |
|
|
| 177 |
public MappedByteBuffer getPresencesMapped() {
|
|
| 178 |
return presencesMapped; |
|
| 179 |
} |
|
| 180 |
|
|
| 181 |
public int getPresence(int tigerPosition) {
|
|
| 182 |
if (presencesMapped != null) {
|
|
| 183 |
return presencesMapped.getInt(tigerPosition*Integer.BYTES); |
|
| 184 |
} else {
|
|
| 185 |
return 0; |
|
| 186 |
} |
|
| 187 |
} |
|
| 188 |
|
|
| 189 |
public int[] getPresences(int tigerPositions[]) {
|
|
| 190 |
int[] ret = new int[tigerPositions.length]; |
|
| 191 |
if (presencesMapped != null) {
|
|
| 192 |
for (int i = 0 ; i < tigerPositions.length ; i++) {
|
|
| 193 |
ret[i] = presencesMapped.getInt(tigerPositions[i]*Integer.BYTES); |
|
| 194 |
} |
|
| 195 |
} |
|
| 196 |
|
|
| 197 |
return ret; |
|
| 198 |
} |
|
| 199 |
|
|
| 93 | 200 |
public static boolean createLogPropFile(File directory) {
|
| 94 | 201 |
directory.mkdirs(); |
| 95 | 202 |
File logprop = new File(directory, "tigersearch.logprop"); |
| ... | ... | |
| 110 | 217 |
} |
| 111 | 218 |
return true; |
| 112 | 219 |
} |
| 113 |
|
|
| 220 |
|
|
| 114 | 221 |
public void setDisplayProperties(Header header, List<String> tprops, String ntprop) {
|
| 115 | 222 |
config.setDisplayedTFeatures(header, tprops); |
| 116 | 223 |
config.setDisplayedNTFeature(header, ntprop); |
| 117 | 224 |
} |
| 118 |
|
|
| 225 |
|
|
| 119 | 226 |
public InternalCorpusQueryManager getInternalManager() |
| 120 | 227 |
{
|
| 121 | 228 |
return manager; |
| 122 | 229 |
} |
| 123 |
|
|
| 230 |
|
|
| 124 | 231 |
public List<String> getNTFeatures() |
| 125 | 232 |
{
|
| 126 | 233 |
|
| 127 | 234 |
return manager.getHeader().getAllNTFeatureNames(); |
| 128 |
|
|
| 235 |
|
|
| 129 | 236 |
} |
| 130 |
|
|
| 237 |
|
|
| 131 | 238 |
public List<String> getTFeatures() |
| 132 | 239 |
{
|
| 133 | 240 |
return manager.getHeader().getAllTFeatureNames(); |
| 134 | 241 |
} |
| 135 |
|
|
| 242 |
|
|
| 136 | 243 |
/** |
| 137 | 244 |
* contains a lot of informations about the corpus |
| 138 | 245 |
* @return |
| ... | ... | |
| 141 | 248 |
{
|
| 142 | 249 |
return manager.getHeader(); |
| 143 | 250 |
} |
| 144 |
|
|
| 251 |
|
|
| 145 | 252 |
/** |
| 146 | 253 |
* Opencorpus. |
| 147 | 254 |
* |
| ... | ... | |
| 157 | 264 |
catch (Exception e) { System.out.println(TXMCoreMessages.couldntReadCorpusColon+e.getMessage());}
|
| 158 | 265 |
return false; |
| 159 | 266 |
} |
| 160 |
|
|
| 267 |
|
|
| 161 | 268 |
/** |
| 162 | 269 |
* Query. |
| 163 | 270 |
* |
| ... | ... | |
| 169 | 276 |
{
|
| 170 | 277 |
return query(query, -1, -1, -1); |
| 171 | 278 |
} |
| 172 |
|
|
| 279 |
|
|
| 173 | 280 |
/** |
| 174 | 281 |
* Query. |
| 175 | 282 |
* |
| ... | ... | |
| 195 | 302 |
public boolean isOk() {
|
| 196 | 303 |
return initok; |
| 197 | 304 |
} |
| 305 |
|
|
| 306 |
public Index getIndex() {
|
|
| 307 |
InternalCorpusQueryManagerLocal2 tigermanager = this.manager; |
|
| 308 |
CorpusQueryProcessor processor = tigermanager.getQueryProcessor(); |
|
| 309 |
return processor.getIndex(); |
|
| 310 |
} |
|
| 311 |
|
|
| 312 |
public int[] getSentenceStartPositions() throws IndexException {
|
|
| 313 |
if (sentence_starts != null) {
|
|
| 314 |
return sentence_starts; |
|
| 315 |
} |
|
| 316 |
Index index = getIndex(); |
|
| 317 |
|
|
| 318 |
sentence_starts = new int[index.getNumberOfGraphs()]; |
|
| 319 |
for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
|
|
| 320 |
sentence_starts[i] = 0; |
|
| 321 |
if (i > 0) {
|
|
| 322 |
sentence_starts[i] += index.getNumberOfTNodes(i-1) + sentence_starts[i-1]; |
|
| 323 |
} |
|
| 324 |
} |
|
| 325 |
|
|
| 326 |
return sentence_starts; |
|
| 327 |
} |
|
| 328 |
|
|
| 329 |
public TSProperty getTProperty(String name) {
|
|
| 330 |
return new TSProperty(this, name, true); |
|
| 331 |
} |
|
| 332 |
|
|
| 333 |
public TSProperty getNTProperty(String name) {
|
|
| 334 |
return new TSProperty(this, name, false); |
|
| 335 |
} |
|
| 198 | 336 |
} |
| tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSResult.java (revision 2179) | ||
|---|---|---|
| 107 | 107 |
this.tsCorpus = tsCorpus; |
| 108 | 108 |
|
| 109 | 109 |
result = tsCorpus.manager.processQuery(query, sent_min, sent_max, match_max); |
| 110 |
|
|
| 110 | 111 |
if (result.size() > 0) {
|
| 111 | 112 |
forest = new ResultForest(result, tsCorpus.manager); |
| 112 | 113 |
header = forest.getHeader(); |
| tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TIGERSearchEngine.java (revision 2179) | ||
|---|---|---|
| 1 | 1 |
package org.txm.searchengine.ts; |
| 2 | 2 |
|
| 3 | 3 |
import java.io.File; |
| 4 |
import java.io.IOException; |
|
| 4 | 5 |
import java.io.RandomAccessFile; |
| 5 | 6 |
import java.nio.MappedByteBuffer; |
| 6 | 7 |
import java.nio.channels.FileChannel; |
| 7 | 8 |
import java.util.ArrayList; |
| 9 |
import java.util.HashMap; |
|
| 8 | 10 |
import java.util.LinkedHashSet; |
| 9 | 11 |
import java.util.List; |
| 10 | 12 |
|
| ... | ... | |
| 20 | 22 |
import org.txm.searchengine.core.Selection; |
| 21 | 23 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
| 22 | 24 |
import org.txm.searchengine.cqp.CQPSearchEngine; |
| 25 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
| 26 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
|
| 23 | 27 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
| 24 | 28 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
| 29 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
|
| 25 | 30 |
import org.txm.utils.DeleteDir; |
| 26 | 31 |
import org.txm.utils.logger.Log; |
| 27 | 32 |
|
| ... | ... | |
| 32 | 37 |
public class TIGERSearchEngine extends SearchEngine {
|
| 33 | 38 |
|
| 34 | 39 |
public static final String NAME = "TIGER"; |
| 35 |
|
|
| 40 |
|
|
| 41 |
HashMap<CorpusBuild, TSCorpus> corpora = null; |
|
| 42 |
|
|
| 43 |
public TSCorpus getTSCorpus(CorpusBuild corpus) {
|
|
| 44 |
CorpusBuild root = corpus.getRootCorpusBuild(); |
|
| 45 |
TSCorpus tscorpus = corpora.get(root); |
|
| 46 |
if (tscorpus != null) {
|
|
| 47 |
return tscorpus; |
|
| 48 |
} |
|
| 49 |
|
|
| 50 |
File tigerDirectory = new File(root.getProjectDirectory(), "tiger"); |
|
| 51 |
File configfile = new File(tigerDirectory, "tigersearch.logprop"); |
|
| 52 |
TSCorpusManager manager = new TSCorpusManager(tigerDirectory, configfile); |
|
| 53 |
tscorpus = manager.getCorpus(root.getID()); |
|
| 54 |
if (tscorpus != null) {
|
|
| 55 |
corpora.put(root, tscorpus); |
|
| 56 |
return tscorpus; |
|
| 57 |
} else {
|
|
| 58 |
return null; |
|
| 59 |
} |
|
| 60 |
} |
|
| 61 |
|
|
| 62 |
public TSCorpus removeTSCorpus(CorpusBuild corpus) {
|
|
| 63 |
CorpusBuild root = corpus.getRootCorpusBuild(); |
|
| 64 |
return corpora.remove(root); |
|
| 65 |
} |
|
| 66 |
|
|
| 36 | 67 |
@Override |
| 37 | 68 |
public boolean isRunning() {
|
| 38 | 69 |
return true; |
| ... | ... | |
| 45 | 76 |
|
| 46 | 77 |
/** |
| 47 | 78 |
* |
| 79 |
* @param cqpCorpus the targeted CQPCorpus |
|
| 80 |
* @return the first sentence and last sentence id (from 0 to N, N the number of sentences). WARNING: this is not the list of sentences in the targeted CQPCorpus. unless the CQPcorpus is contigues |
|
| 81 |
* |
|
| 82 |
* @throws UnexpectedAnswerException |
|
| 83 |
* @throws IOException |
|
| 84 |
* @throws CqiServerError |
|
| 85 |
* @throws CqiClientException |
|
| 86 |
*/ |
|
| 87 |
public static int[] getSentMinMax(CQPCorpus cqpCorpus) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
|
|
| 88 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
| 89 |
List<org.txm.searchengine.cqp.corpus.query.Match> matches = cqpCorpus.getMatches(); |
|
| 90 |
if (matches.size() == 0) {
|
|
| 91 |
return new int[]{0,0};
|
|
| 92 |
} |
|
| 93 |
int[] cpos = new int[] {matches.get(0).getStart(), matches.get(matches.size()-1).getEnd()};
|
|
| 94 |
int[] structs = CQI.cpos2Struc(cqpCorpus.getStructuralUnit("s").getProperty("n").getQualifiedName(), cpos);
|
|
| 95 |
if (structs.length == 0) {
|
|
| 96 |
return new int[]{0,0};
|
|
| 97 |
} |
|
| 98 |
int sent_min = structs[0]; |
|
| 99 |
int sent_max = structs[structs.length-1]; |
|
| 100 |
return new int[]{sent_min, sent_max};
|
|
| 101 |
} |
|
| 102 |
|
|
| 103 |
/** |
|
| 104 |
* |
|
| 48 | 105 |
* @return true because TIGER queries are frequently multi lines |
| 49 | 106 |
*/ |
| 50 | 107 |
public boolean hasMultiLineQueries() {
|
| 51 | 108 |
return true; |
| 52 | 109 |
} |
| 53 |
|
|
| 110 |
|
|
| 54 | 111 |
@Override |
| 55 | 112 |
public boolean start(IProgressMonitor monitor) throws Exception {
|
| 113 |
corpora = new HashMap<CorpusBuild, TSCorpus>(); |
|
| 56 | 114 |
return true; |
| 57 | 115 |
} |
| 58 | 116 |
|
| 59 | 117 |
@Override |
| 60 | 118 |
public boolean stop() throws Exception {
|
| 119 |
if (corpora != null) {
|
|
| 120 |
for (TSCorpus corpus : corpora.values()) {
|
|
| 121 |
corpus.close(); // free memory (mmap, etc.) |
|
| 122 |
} |
|
| 123 |
corpora.clear(); |
|
| 124 |
} |
|
| 61 | 125 |
return true; |
| 62 | 126 |
} |
| 63 | 127 |
|
| ... | ... | |
| 68 | 132 |
|
| 69 | 133 |
@Override |
| 70 | 134 |
public Selection query(CorpusBuild corpus, IQuery query, String name, boolean saveQuery) throws Exception {
|
| 71 |
|
|
| 72 |
File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger"); |
|
| 73 |
File configfile = new File(tigerDirectory, "tigersearch.logprop"); |
|
| 74 |
TSCorpusManager manager = new TSCorpusManager(tigerDirectory, configfile); |
|
| 75 |
|
|
| 76 |
File offsetsFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/offsets.data"); |
|
| 77 |
RandomAccessFile offsetsRAFile = null; |
|
| 78 |
FileChannel offsetsFileChannel = null; |
|
| 79 |
MappedByteBuffer offsetsMapped = null; |
|
| 80 |
if (offsetsFile.exists()) {
|
|
| 81 |
offsetsRAFile = new RandomAccessFile(offsetsFile, "rw"); |
|
| 82 |
offsetsFileChannel = offsetsRAFile.getChannel(); |
|
| 83 |
offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size()); |
|
| 84 |
} |
|
| 85 |
|
|
| 86 |
//out.putInt(positions[i]) |
|
| 87 | 135 |
|
| 88 |
File presencesFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/presences.data"); |
|
| 89 |
RandomAccessFile presencesRAFile = null; |
|
| 90 |
FileChannel presencesFileChannel = null; |
|
| 91 |
MappedByteBuffer presencesMapped = null; |
|
| 92 |
if (presencesFile.exists()) {
|
|
| 93 |
presencesRAFile = new RandomAccessFile(presencesFile, "rw"); |
|
| 94 |
presencesFileChannel = presencesRAFile.getChannel(); |
|
| 95 |
presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size()); |
|
| 96 |
} |
|
| 97 |
|
|
| 98 |
|
|
| 99 |
TSCorpus tcorpus = manager.getCorpus(corpus.getRootCorpusBuild().getID()); |
|
| 136 |
TSCorpus tcorpus = this.getTSCorpus(corpus); |
|
| 100 | 137 |
TSResult result = null; |
| 101 | 138 |
if (corpus == corpus.getRootCorpusBuild() || !(corpus instanceof CQPCorpus)) { // root corpus or something not a CQPCorpus
|
| 102 | 139 |
result = tcorpus.query(query.getQueryString().replace("\n", " "));
|
| ... | ... | |
| 124 | 161 |
if (size == 0 || subsize == 0) {
|
| 125 | 162 |
return new EmptySelection(query); |
| 126 | 163 |
} |
| 127 |
|
|
| 128 |
InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager; |
|
| 129 |
CorpusQueryProcessor processor = tigermanager.getQueryProcessor(); |
|
| 130 |
Index index = processor.getIndex(); |
|
| 131 |
|
|
| 164 |
|
|
| 165 |
Index index = tcorpus.getIndex(); |
|
| 166 |
|
|
| 132 | 167 |
// compute sentence positions |
| 133 | 168 |
//TODO move it to TSCorpus |
| 134 |
int[] starts = new int[index.getNumberOfGraphs()]; |
|
| 135 |
for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
|
|
| 136 |
starts[i] = 0; |
|
| 137 |
if (i > 0) {
|
|
| 138 |
starts[i] += index.getNumberOfTNodes(i-1) + starts[i-1]; |
|
| 139 |
} |
|
| 140 |
} |
|
| 141 |
|
|
| 169 |
int[] starts = tcorpus.getSentenceStartPositions(); |
|
| 170 |
|
|
| 142 | 171 |
LinkedHashSet<TIGERMatch> tigerMatchesList = new LinkedHashSet<TIGERMatch>(); |
| 143 |
|
|
| 172 |
|
|
| 144 | 173 |
List<String> variables = java.util.Arrays.asList(mresult.getVariableNames()); |
| 145 | 174 |
//System.out.println("Variables: "+variables+" iPivot="+variables.indexOf("pivot"));
|
| 146 | 175 |
int iPivot = variables.indexOf("pivot");
|
| 176 |
|
|
| 177 |
MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped(); |
|
| 178 |
//MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped(); |
|
| 147 | 179 |
|
| 148 | 180 |
//System.out.println("submatchSize: "+subsize);
|
| 149 | 181 |
for (int imatch = 0 ; imatch < size; imatch++) { // the matching sentences
|
| 150 | 182 |
int sent = mresult.getSentenceNumberAt(imatch); |
| 151 | 183 |
//Sentence sentence = tcorpus.manager.getSentence(sent); |
| 152 |
|
|
| 184 |
|
|
| 153 | 185 |
//System.out.println(" sent: "+sent);
|
| 154 | 186 |
int sent_submatch = mresult.getSentenceSubmatchSize(sent); |
| 155 |
|
|
| 187 |
|
|
| 156 | 188 |
//System.out.println(" sent submatch size: "+sent_submatch);
|
| 157 | 189 |
for (int isubmatch = 0 ; isubmatch < sent_submatch ; isubmatch++) { // the matches in the sentence
|
| 158 | 190 |
int[] match = mresult.getSentenceSubmatchAt(sent, isubmatch); |
| 159 |
|
|
| 191 |
|
|
| 160 | 192 |
int sent_start = starts[sent]; |
| 161 |
|
|
| 162 |
// System.out.println(" sent="+sent_start+ " matches="+Arrays.toString(match)+" ipivot="+iPivot);
|
|
| 193 |
|
|
| 194 |
// System.out.println(" sent="+sent_start+ " matches="+Arrays.toString(match)+" ipivot="+iPivot);
|
|
| 163 | 195 |
for (int i = 0 ; i < match.length ; i++) {
|
| 164 |
|
|
| 196 |
|
|
| 165 | 197 |
if (iPivot != -1 && i != iPivot) continue; // skip match that are not 'pivot' |
| 166 |
|
|
| 198 |
|
|
| 167 | 199 |
int left = sent_start+index.getLeftCorner(sent, match[i]); |
| 168 | 200 |
if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
|
| 169 | 201 |
left += offsetsMapped.getInt(left*Integer.BYTES); |
| 170 |
// System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
|
|
| 202 |
// System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
|
|
| 171 | 203 |
} |
| 172 | 204 |
int right = sent_start+index.getRightCorner(sent, match[i]); |
| 173 | 205 |
if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
|
| 174 | 206 |
right += offsetsMapped.getInt(right*Integer.BYTES); |
| 175 | 207 |
} |
| 176 | 208 |
//System.out.println(" M="+match[i]+" ("+left+", "+right+")");
|
| 177 |
|
|
| 209 |
|
|
| 178 | 210 |
TIGERMatch tigerMatch = new TIGERMatch(left, right); |
| 179 |
|
|
| 211 |
|
|
| 180 | 212 |
//System.out.println(" ajusted="+(tigerMatch));
|
| 181 | 213 |
tigerMatchesList.add(tigerMatch); |
| 182 | 214 |
} |
| ... | ... | |
| 185 | 217 |
|
| 186 | 218 |
//intersect with corpus matches |
| 187 | 219 |
List<? extends Match> result2 = Match.intersect(corpus.getMatches(), new ArrayList<TIGERMatch>(tigerMatchesList), true); |
| 188 |
|
|
| 189 |
if (presencesRAFile != null) presencesRAFile.close(); |
|
| 190 |
if (presencesFileChannel != null) presencesFileChannel.close(); |
|
| 191 |
if (offsetsRAFile != null) offsetsRAFile.close(); |
|
| 192 |
if (offsetsFileChannel != null) offsetsFileChannel.close(); |
|
| 193 |
|
|
| 220 |
|
|
| 194 | 221 |
return new TIGERSelection(query, result2); |
| 195 | 222 |
} |
| 196 | 223 |
|
| ... | ... | |
| 201 | 228 |
|
| 202 | 229 |
@Override |
| 203 | 230 |
public boolean hasIndexes(CorpusBuild corpus) {
|
| 231 |
if (corpus == null) return false; |
|
| 232 |
|
|
| 204 | 233 |
//TODO implement a corpora of TIGER corpus |
| 205 |
File buildDirectory = new File(corpus.getProjectDirectory(), "tiger"); |
|
| 234 |
CorpusBuild root = corpus.getRootCorpusBuild(); |
|
| 235 |
File buildDirectory = new File(root.getProjectDirectory(), "tiger"); |
|
| 206 | 236 |
return new File(buildDirectory, "tigersearch.logprop").exists() && |
| 207 |
new File(buildDirectory, corpus.getID()).exists();
|
|
| 237 |
new File(buildDirectory, root.getID()).exists();
|
|
| 208 | 238 |
} |
| 209 | 239 |
|
| 210 | 240 |
@Override |
| 211 | 241 |
public void notify(TXMResult r, String state) {
|
| 212 |
if (r instanceof MainCorpus && "clean".equals(state)) {
|
|
| 242 |
if (r instanceof MainCorpus && "clean".equals(state)) { // the CQP corpus has been deleted by the user
|
|
| 213 | 243 |
MainCorpus c = (MainCorpus)r; |
| 214 | 244 |
File buildDirectory = new File(c.getProjectDirectory(), "tiger/"+c.getID()); |
| 215 | 245 |
if (buildDirectory.exists()) {
|
| 216 | 246 |
DeleteDir.deleteDirectory(buildDirectory); |
| 217 | 247 |
} |
| 218 |
} else if (r instanceof Project && "clean".equals(state)) {
|
|
| 248 |
} else if (r instanceof Project && "clean".equals(state)) { // the Project has been deleted by the user
|
|
| 219 | 249 |
Project c = (Project)r; |
| 220 | 250 |
File buildDirectory = new File(c.getProjectDirectory(), "tiger"); |
| 221 | 251 |
if (buildDirectory.exists()) {
|
| tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSProperty.java (revision 2179) | ||
|---|---|---|
| 1 |
package org.txm.searchengine.ts; |
|
| 2 |
|
|
| 3 |
import org.txm.searchengine.core.SearchEngineProperty; |
|
| 4 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
| 5 |
|
|
| 6 |
import ims.tiger.index.reader.Index; |
|
| 7 |
import ims.tiger.query.processor.CorpusQueryProcessor; |
|
| 8 |
|
|
| 9 |
public class TSProperty implements SearchEngineProperty {
|
|
| 10 |
|
|
| 11 |
TSCorpus tcorpus; |
|
| 12 |
String name; |
|
| 13 |
boolean T; |
|
| 14 |
|
|
| 15 |
public TSProperty(TSCorpus corpus, String name, boolean T) {
|
|
| 16 |
this.tcorpus = corpus; |
|
| 17 |
this.name = name; |
|
| 18 |
this.T = T; |
|
| 19 |
} |
|
| 20 |
|
|
| 21 |
@Override |
|
| 22 |
public String getName() {
|
|
| 23 |
return name; |
|
| 24 |
} |
|
| 25 |
|
|
| 26 |
@Override |
|
| 27 |
public String getFullName() {
|
|
| 28 |
return tcorpus.getHeader().getCorpus_ID()+"_"+name; |
|
| 29 |
} |
|
| 30 |
|
|
| 31 |
public String getValue(Match m) {
|
|
| 32 |
if (m == null) return null; |
|
| 33 |
|
|
| 34 |
InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager; |
|
| 35 |
CorpusQueryProcessor processor = tigermanager.getQueryProcessor(); |
|
| 36 |
Index index = processor.getIndex(); |
|
| 37 |
|
|
| 38 |
//TODO not finished |
|
| 39 |
|
|
| 40 |
return null; |
|
| 41 |
} |
|
| 42 |
|
|
| 43 |
} |
|
| 0 | 44 | |
Formats disponibles : Unified diff