Révision 2179

tmp/org.txm.tigersearch.rcp/build.properties (revision 2179)
8 8
               lib/dom4j-1.6.1.jar,\
9 9
               lib/log4j-1.2.12.jar,\
10 10
               lib/TigerSearch.jar,\
11
               groovy/org/,\
12
               OSGI-INF/
11
               OSGI-INF/,\
12
               groovy/
13 13
source..=src/
14 14
qualifier=svn
tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 2179)
1 1
Manifest-Version: 1.0
2 2
Require-Bundle: org.txm.index.rcp;bundle-version="1.0.0";visibility:=reexport,
3
 org.txm.groovy.core;bundle-version="1.0.0";visibility:=reexport
3
 org.txm.groovy.core;bundle-version="1.0.0";visibility:=reexport,
4
 org.txm.searchengine.core
4 5
Export-Package: ims.jmanual,
5 6
 ims.tiger.corpus,
6 7
 ims.tiger.export,
tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERRatioMacro.groovy (revision 2179)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.txm.searchengine.core.SearchEnginesManager
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.searchengine.ts.TIGERSearchEngine
12

  
13
def scriptName = this.class.getSimpleName()
14

  
15
def selection = []
16
for (def s : corpusViewSelections) {
17
	if (s instanceof CQPCorpus) selection << s
18
	else if (s instanceof Partition) selection.addAll(s.getParts())
19
}
20

  
21
if (selection.size() == 0) {
22
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
23
	return false
24
} else {
25
	for (def c : selection) c.compute(false)
26
}
27

  
28
@Field @Option(name="tiger_query_A", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]")
29
		String tiger_query_A
30
		@Field @Option(name="tiger_query_B", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]")
31
		String tiger_query_B
32
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
33
		def count_subgraph
34
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
35
		debug
36
if (!ParametersDialog.open(this)) return	
37
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
38

  
39

  
40
def results_A = gse.run(TIGERMatchesMacro, ["args":["tiger_query":tiger_query_A, "count_subgraph":count_subgraph, "debug":debug],
41
	"selection":selection,
42
	"selections":selections,
43
	"corpusViewSelection":corpusViewSelection,
44
	"corpusViewSelections":corpusViewSelections,
45
	"monitor":monitor])
46

  
47
def results_B = gse.run(TIGERMatchesMacro,	["args":["tiger_query":tiger_query_B, "count_subgraph":count_subgraph, "debug":debug],
48
	"selection":selection,
49
	"selections":selections,
50
	"corpusViewSelection":corpusViewSelection,
51
	"corpusViewSelections":corpusViewSelections,
52
	"monitor":monitor])
53

  
54
println results_A
55
println results_B
56
for (def corpus : selection) {
57
	String.format( "%.2f", coef * 100.0d )
58
	println "R = ${results_A[corpus]} / ${results_B[corpus]} = "+String.format( "%.2f", results_A[corpus] / results_B[corpus])
59
}
60
println "Done."
tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERMatchesMacro.groovy (revision 2179)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.txm.searchengine.core.SearchEnginesManager
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.searchengine.ts.TIGERSearchEngine
12

  
13
def scriptName = this.class.getSimpleName()
14

  
15
def selection = []
16
for (def s : corpusViewSelections) {
17
	if (s instanceof CQPCorpus) selection << s
18
	else if (s instanceof Partition) selection.addAll(s.getParts())
19
}
20

  
21
if (selection.size() == 0) {
22
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
23
	return false
24
} else {
25
	for (def c : selection) c.compute(false)
26
}
27

  
28
@Field @Option(name="tiger_query", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]")
29
		String tiger_query
30
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
31
		def count_subgraph
32
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
33
		debug
34
if (!ParametersDialog.open(this)) return	
35
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
36

  
37
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
38
println tse
39

  
40

  
41
println "\t"+selection.join("\t")
42
print "F"
43

  
44
def results = new LinkedHashMap()
45
for (def corpus : selection) {
46

  
47
def root = corpus.getRootCorpusBuild();
48
File buildDirectory = new File(root.getProjectDirectory(), "tiger");
49

  
50
	if (!tse.hasIndexes(corpus)) {
51
		println "Warning: skipping $corpus: no TIGERSearch indexes found."
52
		continue;
53
	}
54
	
55
	def tcorpus = tse.getTSCorpus(corpus);
56
	def sentences_min_max = tse.getSentMinMax(corpus);
57
	def mresult = tcorpus.manager.processQuery(tiger_query, sentences_min_max[0], sentences_min_max[1], 9999999);
58
	int size = 0;
59
	if (count_subgraph) {
60
		size = mresult.submatchSize();
61
	} else {
62
		size = mresult.size();
63
	}
64
	results[corpus] = size
65
	print "\t"+size
66
}
67
println ""
68

  
69
println "Done."
70

  
71
return results
tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERIndexMacro.groovy (revision 2179)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

  
7
import groovy.transform.Field
8

  
9
import java.util.List
10

  
11
import org.txm.searchengine.core.EmptySelection
12
import org.txm.searchengine.core.SearchEnginesManager
13
import org.txm.searchengine.cqp.AbstractCqiClient
14
import org.txm.searchengine.cqp.CQPSearchEngine
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.ts.TIGERSearchEngine
17
import org.txm.searchengine.ts.TSCorpus
18
import org.txm.searchengine.ts.TSResult
19
import org.txm.utils.ConsoleProgressBar
20
import org.txm.utils.logger.Log
21

  
22
import ims.tiger.corpus.Sentence
23
import ims.tiger.query.api.*;
24

  
25
def scriptName = this.class.getSimpleName()
26

  
27
def selection = []
28
for (def s : corpusViewSelections) {
29
	if (s instanceof CQPCorpus) selection << s
30
	else if (s instanceof Partition) selection.addAll(s.getParts())
31
}
32

  
33
if (selection.size() == 0) {
34
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
35
	return false
36
} else {
37
	for (def c : selection) c.compute(false)
38
}
39

  
40
@Field @Option(name="tiger_query", usage="A Full TIGERSearch query", widget="Text", required=true, def="[]")
41
		String tiger_query
42
@Field @Option(name="labels", usage="List of TIGER labels separated with ','", widget="String", required=true, def="A,B,C")
43
		def labels
44
@Field @Option(name="properties", usage="List of properties separated with ','", widget="String", required=true, def="word, word, word")
45
		def properties
46
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
47
		def count_subgraph
48
@Field @Option(name="sort_column", usage="choose between the len or freq columns", widget="StringArray", metaVar="freq	labels", required=true, def="freq")
49
		String sort_column
50
		@Field @Option(name="max_lines", usage="choose between the len or freq columns", widget="Integer", metaVar="freq	labels", required=true, def="-1")
51
		int max_lines
52
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
53
		debug
54
if (!ParametersDialog.open(this)) return
55
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
56

  
57
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
58

  
59
if (labels.length() == 0) {
60
	println "Error: no label given. Aborting"
61
	return false;
62
} else {
63
	labels = labels.split(",")
64
}
65
if (properties.length() == 0) {
66
	println "Error: no property given. Aborting"
67
	return false;
68
} else {
69
	properties = properties.split(",")
70
}
71

  
72
if (properties.size() != labels.size()) {
73
	println "Error: the number of labels and properties does not matches. labels=$labels properties=$properties (${labels.size()} != {properties.size()})"
74
	return false;
75
}
76

  
77
def missing = []
78
for (def l : labels) {
79
	if (!tiger_query.contains("#${l}:")) {
80
		missing << l
81
	}
82
}
83
if (missing.size() > 0) {
84
	println "Error: $missing label(s) not found in query: $tiger_query"
85
	return false
86
}
87

  
88
def results = new LinkedHashMap()
89
def total_counts = [:] // List<String> -> Integer
90

  
91
ConsoleProgressBar cpb = new ConsoleProgressBar(selection.size())
92
for (def corpus : selection) {
93

  
94
	def counts = [:] // List<String> -> Integer
95
	def root = corpus.getRootCorpusBuild();
96
	File buildDirectory = new File(root.getProjectDirectory(), "tiger");
97

  
98
	if (!tse.hasIndexes(corpus)) {
99
		println "Warning: skipping $corpus: no TIGERSearch indexes found."
100
		continue;
101
	}
102

  
103
	def tcorpus = tse.getTSCorpus(corpus);
104
	def index = tcorpus.getIndex()
105
	def sentences_min_max = tse.getSentMinMax(corpus);
106
	MatchResult mresult = tcorpus.manager.processQuery(tiger_query, sentences_min_max[0], sentences_min_max[1], 9999999);
107

  
108
	List<String> variables = java.util.Arrays.asList(mresult.getVariableNames());
109
	def iVariables = []
110
	for (def l : labels) {
111
		iVariables << variables.indexOf(l)
112
	}
113

  
114
	def matches = mresult.matches
115
	int size = matches.keySet().size();
116
	if (debug > 0) println "$size sentences matched."
117
	for (int sent : matches.keySet()) { // the matching sentences
118
		if (!mresult.isMatchingSentence(sent)) continue;
119

  
120
		int sent_submatch_size = mresult.getSentenceSubmatchSize(sent);
121
		//println "sent no $sent with $sent_submatch_size submatches : "
122
		def sentence = index.getSentence(sent)
123
		for (int j = 0 ; j < sent_submatch_size ; j++) {
124
			//println "submatch no $j"
125
			def positions = mresult.getSentenceSubmatchAt(sent, j)
126
			def strings = []
127
//			println "	positions=$positions"
128
//			println "	properties=$properties"
129
//			println "	iVariables=$iVariables"
130
			int t = 0;
131
			for (int iV : iVariables) {
132
				def node = sentence.getNode(positions[iV])
133
				String value = node.getFeature(properties[t])
134
				t++
135
				if (value == null) {
136
					//println "Warning: no '${properties[iV]}' feature found in node="+node.getFeatures()
137
					value = "NA"
138
				}
139
				strings << value
140
			}
141
			
142
			if (!counts.containsKey(strings)) {
143
				counts[strings] = 0;
144
			}
145
			if (!total_counts.containsKey(strings)) {
146
				total_counts[strings] = 0;
147
			}
148

  
149
			counts[strings] = counts[strings] + 1
150
			total_counts[strings] = total_counts[strings] + 1
151

  
152
			if (!count_subgraph) break;
153
		}
154
	}
155

  
156
	results[corpus] = counts
157
	cpb.tick()
158
	// DEBUG
159
	//	def keys = []
160
	//	keys.addAll(counts.keySet())
161
	//	if ("freq".equals(sort_column)) {
162
	//		keys = keys.sort() {-counts[it]}
163
	//	} else {
164
	//		keys = keys.sort()
165
	//	}
166
	//
167
	//	for (def k : keys) {
168
	//		println "${k.join('_')}	${counts[k]}"
169
	//	}
170
}
171
cpb.done()
172

  
173
def keys = []
174
keys.addAll(total_counts.keySet())
175
if ("freq".equals(sort_column)) {
176
	keys = keys.sort() {-total_counts[it]}
177
} else {
178
	keys = keys.sort()
179
}
180

  
181
println properties.join(", ")+"\t"+"F\t"+selection.join("\t")
182

  
183
int nline = 0;
184
for (def k : keys) {
185
	print "${k.join('_')}	${total_counts[k]}"
186
	if (selection.size() > 1) {
187
		for (def corpus : selection) {
188
			def v = results[corpus][k];
189
			if (v == null) v = 0;
190
			print "\t"+v
191
		}
192
	}
193
	println ""
194
	nline++
195
	if (max_lines > 0 && nline >= max_lines) {
196
		println "... (${keys.size() - max_lines})"
197
		break;
198
	}
199
}
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/rcp/InstallGroovyTIGERFiles.java (revision 2179)
30 30
		scriptsPackageDirectory2.mkdirs();
31 31
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory);
32 32
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/scripts/importer", "", scriptsPackageDirectory2);
33
	
33
		
34
		scriptsPackageDirectory = new File(userDirectory, "org/txm/macro/tiger");
35
		scriptsPackageDirectory2 = new File(systemDirectory, "org/txm/macro/tiger");
36
		scriptsPackageDirectory.mkdirs();
37
		scriptsPackageDirectory2.mkdirs();
38
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/macro/tiger", "", scriptsPackageDirectory);
39
		BundleUtils.copyFiles(bundle_id, "groovy", "org/txm/macro/tiger", "", scriptsPackageDirectory2);
40
		
34 41
		return scriptsDirectory.exists();
35 42
	}
36 43

  
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpus.java (revision 2179)
30 30
import ims.tiger.corpus.Header;
31 31
import ims.tiger.export.ExportManager;
32 32
import ims.tiger.gui.tigergraphviewer.TIGERGraphViewerConfiguration;
33
import ims.tiger.index.reader.Index;
34
import ims.tiger.index.reader.IndexException;
33 35
import ims.tiger.query.internalapi.InternalCorpusQueryManager;
34 36
import ims.tiger.query.internalapi.InternalCorpusQueryManagerLocal;
35 37
import ims.tiger.query.processor.CorpusQueryProcessor;
36 38

  
37 39
import java.io.File;
38 40
import java.io.FileNotFoundException;
41
import java.io.IOException;
42
import java.io.RandomAccessFile;
39 43
import java.io.UnsupportedEncodingException;
44
import java.nio.MappedByteBuffer;
45
import java.nio.channels.FileChannel;
40 46
import java.util.HashMap;
41 47
import java.util.List;
42 48

  
......
55 61
	public TSCorpusManager tsmanager;
56 62
	public InternalCorpusQueryManagerLocal2 manager = null;
57 63
	ExportManager exporter;
58
	
64

  
59 65
	/** The config. */
60 66
	TIGERGraphViewerConfiguration config;
61
	
67

  
62 68
	/** The initok. */
63 69
	boolean initok = false;
64
	
70

  
65 71
	/** The results. */
66 72
	HashMap<Integer, TSResult> results = new HashMap<Integer, TSResult>();
67 73

  
74
	// Additional data for corpus alignment with TXM base corpus (CQP corpus)
75
	RandomAccessFile offsetsRAFile = null;
76
	FileChannel offsetsFileChannel = null;
77
	MappedByteBuffer offsetsMapped = null; // one offset per tiger position
78
	RandomAccessFile presencesRAFile = null;
79
	FileChannel presencesFileChannel = null;
80
	MappedByteBuffer presencesMapped = null; // one 0/1 boolean per tiger position
81

  
82
	private int[] sentence_starts;
83

  
84
	@Override
85
	public void finalize() {
86
		try {
87
			close();
88
		} catch(Exception e) {
89
			e.printStackTrace();
90
		}
91
	}
92
	
68 93
	/**
69 94
	 * Instantiates a new TS corpus.
70 95
	 *
......
72 97
	 * @param tsmanager the tsmanager
73 98
	 */
74 99
	public TSCorpus(String corpusId, TSCorpusManager tsmanager) {
75
		
100

  
76 101
		String regpath = tsmanager.getRegistryPath();
77 102
		String confpath = tsmanager.getconfPath();
78 103
		try {
79 104
			this.tsmanager = tsmanager;
105
			
80 106
			manager = new InternalCorpusQueryManagerLocal2(regpath);
81 107
			
82 108
			manager.getQueryProcessor();
......
84 110
			this.id = corpusId;		
85 111
			initok = opencorpus();
86 112
			exporter = new ExportManager(manager, ""); //$NON-NLS-1$
113

  
114
			File offsetsFile = new File(regpath, corpusId+"/offsets.data");
115
			if (offsetsFile.exists()) {
116
				offsetsRAFile = new RandomAccessFile(offsetsFile, "rw");
117
				offsetsFileChannel = offsetsRAFile.getChannel();
118
				offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size());
119
			}
120

  
121
			//out.putInt(positions[i])
122

  
123
			File presencesFile = new File(regpath, corpusId+"/presences.data");
124

  
125
			if (presencesFile.exists()) {
126
				presencesRAFile = new RandomAccessFile(presencesFile, "rw");
127
				presencesFileChannel = presencesRAFile.getChannel();
128
				presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size());
129
			}
87 130
		} catch (Exception e) {
88 131
			// TODO Auto-generated catch block
89 132
			org.txm.utils.logger.Log.printStackTrace(e);
90 133
		}
91 134
	}
92 135
	
136
	public String getID() {
137
		return this.id;
138
	}
139

  
140
	public void close() {
141
		try {
142
			if (presencesRAFile != null) presencesRAFile.close();
143
			if (presencesFileChannel != null) presencesFileChannel.close();
144
			if (offsetsRAFile != null) offsetsRAFile.close();
145
			if (offsetsFileChannel != null) offsetsFileChannel.close();
146
			if (sentence_starts != null) sentence_starts = null;
147
			if (results != null) results.clear();
148
		} catch (IOException e) {
149
			// TODO Auto-generated catch block
150
			e.printStackTrace();
151
		}
152
	}
153

  
154
	public int getOffset(int tigerPosition) {
155
		if (offsetsMapped != null) {
156
			return offsetsMapped.getInt(tigerPosition*Integer.BYTES);
157
		} else {
158
			return 0;
159
		}
160
	}
161

  
162
	public int[] getOffsets(int tigerPositions[]) {
163
		int[] ret = new int[tigerPositions.length];
164
		if (offsetsMapped != null) {
165
			for (int i = 0 ; i < tigerPositions.length ; i++) {
166
				ret[i] =  offsetsMapped.getInt(tigerPositions[i]*Integer.BYTES);
167
			}
168
		}
169

  
170
		return ret;
171
	}
172

  
173
	public MappedByteBuffer getOffsetsMapped() {
174
		return offsetsMapped;
175
	}
176

  
177
	public MappedByteBuffer getPresencesMapped() {
178
		return presencesMapped;
179
	}
180

  
181
	public int getPresence(int tigerPosition) {
182
		if (presencesMapped != null) {
183
			return presencesMapped.getInt(tigerPosition*Integer.BYTES);
184
		} else {
185
			return 0;
186
		}
187
	}
188

  
189
	public int[] getPresences(int tigerPositions[]) {
190
		int[] ret = new int[tigerPositions.length];
191
		if (presencesMapped != null) {
192
			for (int i = 0 ; i < tigerPositions.length ; i++) {
193
				ret[i] =  presencesMapped.getInt(tigerPositions[i]*Integer.BYTES);
194
			}
195
		}
196

  
197
		return ret;
198
	}
199

  
93 200
	public static boolean createLogPropFile(File directory) {
94 201
		directory.mkdirs();
95 202
		File logprop = new File(directory, "tigersearch.logprop");
......
110 217
		}
111 218
		return true;
112 219
	}
113
	
220

  
114 221
	public void setDisplayProperties(Header header, List<String> tprops, String ntprop) {
115 222
		config.setDisplayedTFeatures(header, tprops);       
116 223
		config.setDisplayedNTFeature(header, ntprop);
117 224
	}
118
	
225

  
119 226
	public InternalCorpusQueryManager getInternalManager()
120 227
	{
121 228
		return manager;
122 229
	}
123
	
230

  
124 231
	public List<String> getNTFeatures()
125 232
	{
126 233

  
127 234
		return manager.getHeader().getAllNTFeatureNames();
128
		
235

  
129 236
	}
130
	
237

  
131 238
	public List<String> getTFeatures()
132 239
	{
133 240
		return manager.getHeader().getAllTFeatureNames();
134 241
	}
135
	
242

  
136 243
	/**
137 244
	 * contains a lot of informations about the corpus
138 245
	 * @return
......
141 248
	{
142 249
		return manager.getHeader();
143 250
	}
144
	
251

  
145 252
	/**
146 253
	 * Opencorpus.
147 254
	 *
......
157 264
		catch (Exception e) { System.out.println(TXMCoreMessages.couldntReadCorpusColon+e.getMessage());}
158 265
		return false;
159 266
	}
160
	
267

  
161 268
	/**
162 269
	 * Query.
163 270
	 *
......
169 276
	{
170 277
		return query(query, -1, -1, -1);
171 278
	}
172
	
279

  
173 280
	/**
174 281
	 * Query.
175 282
	 *
......
195 302
	public boolean isOk() {	
196 303
		return initok;
197 304
	}
305

  
306
	public Index getIndex() {
307
		InternalCorpusQueryManagerLocal2 tigermanager = this.manager;
308
		CorpusQueryProcessor processor = tigermanager.getQueryProcessor();
309
		return processor.getIndex();
310
	}
311

  
312
	public int[] getSentenceStartPositions() throws IndexException {
313
		if (sentence_starts != null) {
314
			return sentence_starts;
315
		}
316
		Index index = getIndex();
317
		
318
		sentence_starts = new int[index.getNumberOfGraphs()];
319
		for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
320
			sentence_starts[i] = 0;
321
			if (i > 0) {
322
				sentence_starts[i] += index.getNumberOfTNodes(i-1) + sentence_starts[i-1];
323
			}
324
		}
325
		
326
		return sentence_starts;
327
	}
328
	
329
	public TSProperty getTProperty(String name) {
330
		return new TSProperty(this, name, true);
331
	}
332
	
333
	public TSProperty getNTProperty(String name) {
334
		return new TSProperty(this, name, false);
335
	}
198 336
}
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSResult.java (revision 2179)
107 107
		this.tsCorpus = tsCorpus;
108 108

  
109 109
		result = tsCorpus.manager.processQuery(query, sent_min, sent_max, match_max);
110
		
110 111
		if (result.size() > 0) {
111 112
			forest = new ResultForest(result, tsCorpus.manager);
112 113
			header = forest.getHeader();
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TIGERSearchEngine.java (revision 2179)
1 1
package org.txm.searchengine.ts;
2 2

  
3 3
import java.io.File;
4
import java.io.IOException;
4 5
import java.io.RandomAccessFile;
5 6
import java.nio.MappedByteBuffer;
6 7
import java.nio.channels.FileChannel;
7 8
import java.util.ArrayList;
9
import java.util.HashMap;
8 10
import java.util.LinkedHashSet;
9 11
import java.util.List;
10 12

  
......
20 22
import org.txm.searchengine.core.Selection;
21 23
import org.txm.searchengine.cqp.AbstractCqiClient;
22 24
import org.txm.searchengine.cqp.CQPSearchEngine;
25
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
26
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
23 27
import org.txm.searchengine.cqp.corpus.CQPCorpus;
24 28
import org.txm.searchengine.cqp.corpus.MainCorpus;
29
import org.txm.searchengine.cqp.serverException.CqiServerError;
25 30
import org.txm.utils.DeleteDir;
26 31
import org.txm.utils.logger.Log;
27 32

  
......
32 37
public class TIGERSearchEngine extends SearchEngine {
33 38

  
34 39
	public static final String NAME = "TIGER";
35
	
40

  
41
	HashMap<CorpusBuild, TSCorpus> corpora = null;
42

  
43
	public TSCorpus getTSCorpus(CorpusBuild corpus) {
44
		CorpusBuild root = corpus.getRootCorpusBuild();
45
		TSCorpus tscorpus = corpora.get(root);
46
		if (tscorpus != null) {
47
			return tscorpus;
48
		}
49
		
50
		File tigerDirectory = new File(root.getProjectDirectory(), "tiger");
51
		File configfile = new File(tigerDirectory, "tigersearch.logprop");
52
		TSCorpusManager manager = new TSCorpusManager(tigerDirectory, configfile);
53
		tscorpus = manager.getCorpus(root.getID());
54
		if (tscorpus != null) {
55
			corpora.put(root, tscorpus);
56
			return tscorpus;
57
		} else {
58
			return null;
59
		}
60
	}
61

  
62
	public TSCorpus removeTSCorpus(CorpusBuild corpus) {
63
		CorpusBuild root = corpus.getRootCorpusBuild();
64
		return corpora.remove(root);
65
	}
66

  
36 67
	@Override
37 68
	public boolean isRunning() {
38 69
		return true;
......
45 76

  
46 77
	/**
47 78
	 * 
79
	 * @param cqpCorpus the targeted CQPCorpus 
80
	 * @return the first sentence and last sentence id (from 0 to N, N the number of sentences). WARNING: this is not the list of sentences in the targeted CQPCorpus. unless the CQPcorpus is contigues
81
	 * 
82
	 * @throws UnexpectedAnswerException
83
	 * @throws IOException
84
	 * @throws CqiServerError
85
	 * @throws CqiClientException
86
	 */
87
	public static int[] getSentMinMax(CQPCorpus cqpCorpus) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
88
		AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
89
		List<org.txm.searchengine.cqp.corpus.query.Match> matches = cqpCorpus.getMatches();
90
		if (matches.size() == 0) {
91
			return new int[]{0,0};
92
		}
93
		int[] cpos = new int[] {matches.get(0).getStart(), matches.get(matches.size()-1).getEnd()};
94
		int[] structs = CQI.cpos2Struc(cqpCorpus.getStructuralUnit("s").getProperty("n").getQualifiedName(), cpos);
95
		if (structs.length == 0) {
96
			return new int[]{0,0};
97
		}
98
		int sent_min = structs[0];
99
		int sent_max = structs[structs.length-1];
100
		return new int[]{sent_min, sent_max};
101
	}
102
	
103
	/**
104
	 * 
48 105
	 * @return true because TIGER queries are frequently multi lines
49 106
	 */
50 107
	public boolean hasMultiLineQueries() {
51 108
		return true;
52 109
	}
53
	
110

  
54 111
	@Override
55 112
	public boolean start(IProgressMonitor monitor) throws Exception {
113
		corpora = new HashMap<CorpusBuild, TSCorpus>();
56 114
		return true;
57 115
	}
58 116

  
59 117
	@Override
60 118
	public boolean stop() throws Exception {
119
		if (corpora != null) {
120
			for (TSCorpus corpus : corpora.values()) {
121
				corpus.close(); // free memory (mmap, etc.)
122
			}
123
			corpora.clear();
124
		}
61 125
		return true;
62 126
	}
63 127

  
......
68 132

  
69 133
	@Override
70 134
	public Selection query(CorpusBuild corpus, IQuery query, String name, boolean saveQuery) throws Exception {
71
		
72
		File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger");
73
		File configfile = new File(tigerDirectory, "tigersearch.logprop");
74
		TSCorpusManager manager = new TSCorpusManager(tigerDirectory, configfile);
75
		
76
		File offsetsFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/offsets.data");
77
		RandomAccessFile offsetsRAFile = null;
78
		FileChannel offsetsFileChannel = null;
79
		MappedByteBuffer offsetsMapped = null;
80
		if (offsetsFile.exists()) {
81
			offsetsRAFile = new RandomAccessFile(offsetsFile, "rw");
82
			offsetsFileChannel = offsetsRAFile.getChannel();
83
			offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size());
84
		}
85
		
86
		//out.putInt(positions[i])
87 135

  
88
		File presencesFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/presences.data");
89
		RandomAccessFile presencesRAFile = null;
90
		FileChannel presencesFileChannel = null;
91
		MappedByteBuffer presencesMapped = null;
92
		if (presencesFile.exists()) {
93
			presencesRAFile = new RandomAccessFile(presencesFile, "rw");
94
			presencesFileChannel = presencesRAFile.getChannel();
95
			presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size());
96
		}
97
		
98

  
99
		TSCorpus tcorpus = manager.getCorpus(corpus.getRootCorpusBuild().getID());
136
		TSCorpus tcorpus = this.getTSCorpus(corpus);
100 137
		TSResult result = null;
101 138
		if (corpus == corpus.getRootCorpusBuild() || !(corpus instanceof CQPCorpus)) { // root corpus or something not a CQPCorpus
102 139
			result = tcorpus.query(query.getQueryString().replace("\n", " "));
......
124 161
		if (size == 0 || subsize == 0) {
125 162
			return new EmptySelection(query);
126 163
		}
127
		
128
		InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager;
129
		CorpusQueryProcessor processor = tigermanager.getQueryProcessor();
130
		Index index = processor.getIndex();
131
		
164

  
165
		Index index = tcorpus.getIndex();
166

  
132 167
		// compute sentence positions
133 168
		//TODO move it to TSCorpus
134
		int[] starts = new int[index.getNumberOfGraphs()];
135
		for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
136
			starts[i] = 0;
137
			if (i > 0) {
138
				starts[i] += index.getNumberOfTNodes(i-1) + starts[i-1];
139
			}
140
		}
141
		
169
		int[] starts = tcorpus.getSentenceStartPositions();
170

  
142 171
		LinkedHashSet<TIGERMatch> tigerMatchesList = new LinkedHashSet<TIGERMatch>();
143
		
172

  
144 173
		List<String> variables = java.util.Arrays.asList(mresult.getVariableNames());
145 174
		//System.out.println("Variables: "+variables+" iPivot="+variables.indexOf("pivot"));
146 175
		int iPivot = variables.indexOf("pivot");
176

  
177
		MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped();
178
		//MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped();
147 179
		
148 180
		//System.out.println("submatchSize: "+subsize);
149 181
		for (int imatch = 0 ; imatch < size; imatch++) { // the matching sentences
150 182
			int sent = mresult.getSentenceNumberAt(imatch);
151 183
			//Sentence sentence = tcorpus.manager.getSentence(sent);
152
			
184

  
153 185
			//System.out.println(" sent: "+sent);
154 186
			int sent_submatch = mresult.getSentenceSubmatchSize(sent);
155
			
187

  
156 188
			//System.out.println(" sent submatch size: "+sent_submatch);
157 189
			for (int isubmatch = 0 ; isubmatch < sent_submatch ; isubmatch++) { // the matches in the sentence
158 190
				int[] match = mresult.getSentenceSubmatchAt(sent, isubmatch);
159
				
191

  
160 192
				int sent_start = starts[sent];
161
				
162
//				System.out.println("  sent="+sent_start+ " matches="+Arrays.toString(match)+" ipivot="+iPivot);
193

  
194
				//				System.out.println("  sent="+sent_start+ " matches="+Arrays.toString(match)+" ipivot="+iPivot);
163 195
				for (int i = 0 ; i < match.length ; i++) {
164
					
196

  
165 197
					if (iPivot != -1 && i != iPivot) continue; // skip match that are not 'pivot'
166
					
198

  
167 199
					int left = sent_start+index.getLeftCorner(sent, match[i]);
168 200
					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
169 201
						left += offsetsMapped.getInt(left*Integer.BYTES);
170
//						System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
202
						//						System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
171 203
					}
172 204
					int right = sent_start+index.getRightCorner(sent, match[i]);
173 205
					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
174 206
						right += offsetsMapped.getInt(right*Integer.BYTES);
175 207
					}
176 208
					//System.out.println("   M="+match[i]+" ("+left+", "+right+")");
177
					
209

  
178 210
					TIGERMatch tigerMatch = new TIGERMatch(left, right);
179
					
211

  
180 212
					//System.out.println("  ajusted="+(tigerMatch));
181 213
					tigerMatchesList.add(tigerMatch);
182 214
				}
......
185 217

  
186 218
		//intersect with corpus matches
187 219
		List<? extends Match> result2 = Match.intersect(corpus.getMatches(),  new ArrayList<TIGERMatch>(tigerMatchesList), true);
188
		
189
		if (presencesRAFile != null) presencesRAFile.close();
190
		if (presencesFileChannel != null) presencesFileChannel.close();
191
		if (offsetsRAFile != null) offsetsRAFile.close();
192
		if (offsetsFileChannel != null) offsetsFileChannel.close();
193
		
220

  
194 221
		return new TIGERSelection(query, result2);
195 222
	}
196 223

  
......
201 228

  
202 229
	@Override
203 230
	public boolean hasIndexes(CorpusBuild corpus) {
231
		if (corpus == null) return false;
232

  
204 233
		//TODO implement a corpora of TIGER corpus
205
		File buildDirectory = new File(corpus.getProjectDirectory(), "tiger");
234
		CorpusBuild root = corpus.getRootCorpusBuild();
235
		File buildDirectory = new File(root.getProjectDirectory(), "tiger");
206 236
		return	new File(buildDirectory, "tigersearch.logprop").exists() && 
207
				new File(buildDirectory, corpus.getID()).exists();
237
				new File(buildDirectory, root.getID()).exists();
208 238
	}
209 239

  
210 240
	@Override
211 241
	public void notify(TXMResult r, String state) {
212
		if (r instanceof MainCorpus && "clean".equals(state)) {
242
		if (r instanceof MainCorpus && "clean".equals(state)) { // the CQP corpus has been deleted by the user
213 243
			MainCorpus c = (MainCorpus)r;
214 244
			File buildDirectory = new File(c.getProjectDirectory(), "tiger/"+c.getID());
215 245
			if (buildDirectory.exists()) {
216 246
				DeleteDir.deleteDirectory(buildDirectory);
217 247
			}
218
		} else if (r instanceof Project && "clean".equals(state)) {
248
		} else if (r instanceof Project && "clean".equals(state)) {  // the Project has been deleted by the user
219 249
			Project c = (Project)r;
220 250
			File buildDirectory = new File(c.getProjectDirectory(), "tiger");
221 251
			if (buildDirectory.exists()) {
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSProperty.java (revision 2179)
1
package org.txm.searchengine.ts;
2

  
3
import org.txm.searchengine.core.SearchEngineProperty;
4
import org.txm.searchengine.cqp.corpus.query.Match;
5

  
6
import ims.tiger.index.reader.Index;
7
import ims.tiger.query.processor.CorpusQueryProcessor;
8

  
9
public class TSProperty implements SearchEngineProperty {
10

  
11
	TSCorpus tcorpus;
12
	String name;
13
	boolean T;
14
	
15
	public TSProperty(TSCorpus corpus, String name, boolean T) {
16
		this.tcorpus = corpus;
17
		this.name = name;
18
		this.T = T;
19
	}
20
	
21
	@Override
22
	public String getName() {
23
		return name;
24
	}
25
	
26
	@Override
27
	public String getFullName() {
28
		return tcorpus.getHeader().getCorpus_ID()+"_"+name;
29
	}
30
	
31
	public String getValue(Match m) {
32
		if (m == null) return null;
33
		
34
		InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager;
35
		CorpusQueryProcessor processor = tigermanager.getQueryProcessor();
36
		Index index = processor.getIndex();
37
		
38
		//TODO not finished
39
		
40
		return null;
41
	}
42

  
43
}
0 44

  

Formats disponibles : Unified diff