Revision 2153

tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpus.java (revision 2153)
34 34
import ims.tiger.query.internalapi.InternalCorpusQueryManagerLocal;
35 35
import ims.tiger.query.processor.CorpusQueryProcessor;
36 36

  
37
import java.io.File;
38
import java.io.FileNotFoundException;
39
import java.io.UnsupportedEncodingException;
37 40
import java.util.HashMap;
38 41
import java.util.List;
39 42

  
40 43
import org.txm.core.messages.TXMCoreMessages;
44
import org.txm.utils.io.IOUtils;
41 45

  
42 46
/**
43 47
 * The Class TSCorpus.
......
49 53

  
50 54
	/** The managers. */
51 55
	public TSCorpusManager tsmanager;
52
	InternalCorpusQueryManagerLocal2 manager = null;
56
	public InternalCorpusQueryManagerLocal2 manager = null;
53 57
	ExportManager exporter;
54 58
	
55 59
	/** The config. */
......
86 90
		}
87 91
	}
88 92
	
93
	public static boolean createLogPropFile(File directory) {
94
		directory.mkdirs();
95
		File logprop = new File(directory, "tigersearch.logprop");
96
		try {
97
			IOUtils.write(logprop, "# Default log configuration of the TIGERSearch suite\n" + 
98
					"log4j.rootLogger=SEVERE,Logfile\n" + 
99
					"log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=SEVERE\n" + 
100
					"log4j.appender.Logfile=org.apache.log4j.RollingFileAppender\n" + 
101
					"log4j.appender.Logfile.File=\\${user.home}/tigersearch/tigersearch.log\n" + 
102
					"log4j.appender.Logfile.MaxFileSize=500KB\n" + 
103
					"log4j.appender.Logfile.MaxBackupIndex=1\n" + 
104
					"log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout\n" + 
105
					"log4j.appender.Logfile.layout.ConversionPattern=%5r %-5p [%t] %c{2} - %m%n");
106
		} catch (Exception e) {
107
			// TODO Auto-generated catch block
108
			e.printStackTrace();
109
			return false;
110
		}
111
		return true;
112
	}
113
	
89 114
	public void setDisplayProperties(Header header, List<String> tprops, String ntprop) {
90 115
		config.setDisplayedTFeatures(header, tprops);       
91 116
		config.setDisplayedNTFeature(header, ntprop);
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TIGERSearchEngine.java (revision 2153)
1 1
package org.txm.searchengine.ts;
2 2

  
3 3
import java.io.File;
4
import java.io.RandomAccessFile;
5
import java.nio.MappedByteBuffer;
6
import java.nio.channels.FileChannel;
4 7
import java.util.ArrayList;
5 8
import java.util.LinkedHashSet;
6 9
import java.util.List;
......
69 72
		File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger");
70 73
		File configfile = new File(tigerDirectory, "tigersearch.logprop");
71 74
		TSCorpusManager manager = new TSCorpusManager(tigerDirectory, configfile);
75
		
76
		File offsetsFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/offsets.data");
77
		RandomAccessFile offsetsRAFile = null;
78
		FileChannel offsetsFileChannel = null;
79
		MappedByteBuffer offsetsMapped = null;
80
		if (offsetsFile.exists()) {
81
			offsetsRAFile = new RandomAccessFile(offsetsFile, "rw");
82
			offsetsFileChannel = offsetsRAFile.getChannel();
83
			offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size());
84
		}
85
		
86
		//out.putInt(positions[i])
72 87

  
88
		File presencesFile = new File(tigerDirectory, corpus.getRootCorpusBuild().getID()+"/presences.data");
89
		RandomAccessFile presencesRAFile = null;
90
		FileChannel presencesFileChannel = null;
91
		MappedByteBuffer presencesMapped = null;
92
		if (presencesFile.exists()) {
93
			presencesRAFile = new RandomAccessFile(presencesFile, "rw");
94
			presencesFileChannel = presencesRAFile.getChannel();
95
			presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size());
96
		}
97
		
98

  
73 99
		TSCorpus tcorpus = manager.getCorpus(corpus.getRootCorpusBuild().getID());
74 100
		TSResult result = null;
75 101
		if (corpus == corpus.getRootCorpusBuild() || !(corpus instanceof CQPCorpus)) { // root corpus or something not a CQPCorpus
......
138 164
					
139 165
					if (iPivot != -1 && i != iPivot) continue; // skip match that are not 'pivot'
140 166
					
141
					int left = index.getLeftCorner(sent, match[i]);
142
					int right = index.getRightCorner(sent, match[i]);
167
					int left = sent_start+index.getLeftCorner(sent, match[i]);
168
					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
169
						left += offsetsMapped.getInt(left*Integer.BYTES);
170
//						System.out.println("left="+left+" offset="+offsetsMapped.getInt(left*Integer.BYTES));
171
					}
172
					int right = sent_start+index.getRightCorner(sent, match[i]);
173
					if (offsetsMapped != null) { // the TIGER token is not in the CQP corpus
174
						right += offsetsMapped.getInt(right*Integer.BYTES);
175
					}
143 176
					//System.out.println("   M="+match[i]+" ("+left+", "+right+")");
144 177
					
145
					TIGERMatch tigerMatch = new TIGERMatch(sent_start+left, sent_start+right);
178
					TIGERMatch tigerMatch = new TIGERMatch(left, right);
146 179
					
147 180
					//System.out.println("  ajusted="+(tigerMatch));
148 181
					tigerMatchesList.add(tigerMatch);
......
153 186
		//intersect with corpus matches
154 187
		List<? extends Match> result2 = Match.intersect(corpus.getMatches(),  new ArrayList<TIGERMatch>(tigerMatchesList), true);
155 188
		
189
		if (presencesRAFile != null) presencesRAFile.close();
190
		if (presencesFileChannel != null) presencesFileChannel.close();
191
		if (offsetsRAFile != null) offsetsRAFile.close();
192
		if (offsetsFileChannel != null) offsetsFileChannel.close();
193
		
156 194
		return new TIGERSelection(query, result2);
157 195
	}
158 196

  
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ImportTIGERAnnotations.java (revision 2153)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.tigersearch.commands;
29

  
30
import java.io.File;
31
import java.io.IOException;
32
import java.io.RandomAccessFile;
33
import java.nio.MappedByteBuffer;
34
import java.nio.channels.FileChannel;
35
import java.util.Arrays;
36

  
37
import org.eclipse.core.commands.AbstractHandler;
38
import org.eclipse.core.commands.ExecutionEvent;
39
import org.eclipse.core.commands.ExecutionException;
40
import org.eclipse.jface.dialogs.MessageDialog;
41
import org.eclipse.jface.viewers.IStructuredSelection;
42
import org.eclipse.swt.SWT;
43
import org.eclipse.swt.widgets.DirectoryDialog;
44
import org.eclipse.ui.handlers.HandlerUtil;
45
import org.txm.searchengine.cqp.AbstractCqiClient;
46
import org.txm.searchengine.cqp.CQPSearchEngine;
47
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
48
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
49
import org.txm.searchengine.cqp.corpus.CQPCorpus;
50
import org.txm.searchengine.cqp.corpus.MainCorpus;
51
import org.txm.searchengine.cqp.serverException.CqiServerError;
52
import org.txm.searchengine.ts.InternalCorpusQueryManagerLocal2;
53
import org.txm.searchengine.ts.TSCorpus;
54
import org.txm.searchengine.ts.TSCorpusManager;
55
import org.txm.utils.DeleteDir;
56
import org.txm.utils.io.FileCopy;
57
import org.txm.utils.logger.Log;
58

  
59
import ims.tiger.corpus.Sentence;
60
import ims.tiger.corpus.T_Node;
61
import ims.tiger.index.reader.Index;
62
import ims.tiger.index.reader.IndexException;
63
import ims.tiger.query.api.QueryIndexException;
64
import ims.tiger.query.processor.CorpusQueryProcessor;
65

  
66

  
67
// TODO: Auto-generated Javadoc
68
/**
69
 * open the TIGERSearch Editor 
70
 * @author mdecorde.
71
 */
72
public class ImportTIGERAnnotations extends AbstractHandler {
73

  
74
	public static final String ID = "org.txm.rcp.commands.function.ComputeTSIndex"; //$NON-NLS-1$
75

  
76
	/* (non-Javadoc)
77
	 * @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent)
78
	 */
79
	@Override
80
	public Object execute(final ExecutionEvent event) throws ExecutionException {
81

  
82
		IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event);
83

  
84
		Object s = selection.getFirstElement();
85
		if (s instanceof CQPCorpus) {
86
			CQPCorpus corpus = (CQPCorpus)s;
87
			MainCorpus mainCorpus = corpus.getMainCorpus();
88
			
89
			File tigerCorpusDirectory = null;
90
			DirectoryDialog dialog = new DirectoryDialog(HandlerUtil.getActiveShell(event), SWT.OPEN);
91
			String path = dialog.open();
92
			if (path == null) {
93
				return null;
94
			} else {
95
				tigerCorpusDirectory = new File(path);
96
			}
97
			
98
			File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger");
99
			File tigerCorpusExistingDirectory = new File(tigerDirectory, tigerCorpusDirectory.getName());
100
			if (tigerCorpusExistingDirectory.exists()) {
101
				boolean doIt = MessageDialog.openConfirm(HandlerUtil.getActiveShell(event), "Replace existing annotations", "TIGERSearch annotations already exists, replace them ?");
102
				if (!doIt) {
103
					Log.warning("Aborting annotations import.");
104
					return null;
105
				}
106
			}
107
			try {
108
				return importAnnotations(mainCorpus, tigerCorpusDirectory);
109
			} catch (Exception e) {
110
				// TODO Auto-generated catch block
111
				e.printStackTrace();
112
				return null;
113
			}
114
		} else {
115
			Log.warning("Selection is not a corpus. Aborting.");
116
			return null;
117
		}
118
	}
119

  
120
	/**
121
	 * 
122
	 * if aTIGER corpus with the same name already exists, it is replaced
123
	 * 
124
	 * @param corpus
125
	 * @param tigerCorpusDirectory
126
	 * @return the number of imported annotations
127
	 * @throws IndexException 
128
	 * @throws QueryIndexException 
129
	 * @throws CqiClientException 
130
	 * @throws CqiServerError 
131
	 * @throws IOException 
132
	 * @throws UnexpectedAnswerException 
133
	 */
134
	public static int importAnnotations(MainCorpus corpus, File tigerCorpusDirectory) throws IndexException, QueryIndexException, UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
135
		
136
		// TXM corpus files
137
		File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger");
138
		File tigerCorpusExistingDirectory = new File(tigerDirectory, tigerCorpusDirectory.getName());
139
		DeleteDir.deleteDirectory(tigerCorpusExistingDirectory);
140
		tigerCorpusExistingDirectory.mkdirs();
141
		
142
		File configfile = new File(tigerDirectory, "tigersearch.logprop");
143
		if (!configfile.exists()) {
144
			TSCorpus.createLogPropFile(tigerDirectory);
145
		}
146
		
147
		AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
148
		
149
		TSCorpusManager manager = new TSCorpusManager(tigerCorpusDirectory.getParentFile(), configfile);
150

  
151
		TSCorpus tcorpus = manager.getCorpus(tigerCorpusDirectory.getName());
152
		InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager;
153
		CorpusQueryProcessor processor = tigermanager.getQueryProcessor();
154
		
155
		Index index = processor.getIndex();
156
		int size = 0;
157
		for (int nr = 0 ; nr < index.getNumberOfGraphs() ; nr++) {
158
			size += index.getNumberOfTNodes(nr);
159
		}
160
		
161
		if (size == 0) {
162
			Log.warning("No word found in the TIGERSearch corpus: "+tigerCorpusDirectory+". Aborting.");
163
			return 0;
164
		}
165
		
166
		Log.info("Importing "+size+" word annotations...");
167
		
168
		// compute start position of sentences
169
		int[] starts = new int[index.getNumberOfGraphs()];
170
		for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
171
			starts[i] = 0;
172
			if (i > 0) {
173
				starts[i] += index.getNumberOfTNodes(i-1) + starts[i-1];
174
			}
175
		}
176
		
177
		File offsetsFile = new File(tigerCorpusExistingDirectory, "offsets.data");
178
		RandomAccessFile offsetsRAFile = new RandomAccessFile(offsetsFile, "rw");
179
		FileChannel offsetsFileChannel = offsetsRAFile.getChannel();
180
		MappedByteBuffer offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_WRITE, 0, size*Integer.BYTES);
181
		//out.putInt(positions[i])
182

  
183
		File presencesFile = new File(tigerCorpusExistingDirectory, "presences.data");
184
		RandomAccessFile presencesRAFile = new RandomAccessFile(presencesFile, "rw");
185
		FileChannel presencesFileChannel = presencesRAFile.getChannel();
186
		MappedByteBuffer presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_WRITE, 0, size);
187

  
188
		int numberOfWordsAnntoated = 0;
189
		
190
		// for each sentence
191
		for (int nr = 0 ; nr < index.getNumberOfGraphs() ; nr++) {
192
			int sent_size = index.getNumberOfTNodes(nr);
193
			Sentence sent = tcorpus.manager.getSentence(nr);
194
			
195
			String[] ids = new String[sent_size];
196
			int[] tigerPositions = new int[sent_size];
197
			for (int t = 0 ; t < sent_size ; t++) {
198
				T_Node terminal = (T_Node)sent.getTerminalAt(t);
199
				ids[t] = terminal.getFeature("editionId");
200
				
201
				// try fixing ID
202
				if (ids[t].startsWith("w")) {
203
					if (!ids[t].startsWith("w_")) {
204
						ids[t] = "w_"+ids[t].substring(1);
205
					}
206
				} else {
207
					ids[t] = "w_"+ids[t];
208
				}
209
				tigerPositions[t] = starts[nr]+t;
210
				//System.out.println("T id="+terminal.getID());
211
			}
212
			
213
			int[] ids_idx = CQI.str2Id(corpus.getProperty("id").getQualifiedName(), ids);
214
			Integer[] cqpPositions = new Integer[sent_size];
215
			Integer[] offsets = new Integer[sent_size];
216
			for (int t = 0 ; t < sent_size ; t++) {
217
				if (ids_idx[t] >= 0) {
218
					int[] positions = CQI.id2Cpos(corpus.getProperty("id").getQualifiedName(), ids_idx[t]);
219
					if (positions.length > 1) {
220
						Log.warning("Warning: multiple CQP positions for word_id="+ids[t]);
221
					}
222
					cqpPositions[t] = positions[0]; // take the first position
223
				} else { // word not in the CQP corpus
224
					cqpPositions[t] = null;
225
				}
226
				
227
				if (cqpPositions[t] != null) {
228
					offsets[t] = cqpPositions[t] - tigerPositions[t];
229
				} else {
230
					offsets[t] = 0;
231
				}
232
			}
233
//			System.out.println("ids="+Arrays.toString(ids));
234
//			System.out.println("cqp indexes="+Arrays.toString(ids_idx));
235
//			System.out.println("tiger positions="+Arrays.toString(tigerPositions));
236
//			System.out.println("cqp positions="+Arrays.toString(cqpPositions));
237
//			System.out.println("offsets="+Arrays.toString(offsets));
238
			
239
			// writing data to offset and presences files
240
			for (int t = 0 ; t < sent_size ; t++) {
241
				offsetsMapped.putInt(offsets[t]);
242
				if (offsets[t] != null) {
243
					numberOfWordsAnntoated++;
244
					presencesMapped.put((byte)1);
245
				} else {
246
					presencesMapped.put((byte)0);
247
				}
248
			}
249
		}
250
		
251
		offsetsFileChannel.close();
252
		offsetsRAFile.close();
253
		presencesFileChannel.close();
254
		presencesRAFile.close();
255
		
256
		if (numberOfWordsAnntoated > 0) {
257
			FileCopy.copyFiles(tigerCorpusDirectory, tigerCorpusExistingDirectory);
258
			Log.warning("Done. "+numberOfWordsAnntoated+" words annotated.");
259
		} else {
260
			Log.warning("Warning: no words could be aligned with the CQP corpus. Aborting");
261
		}
262
		
263
		return numberOfWordsAnntoated;
264
	}
265
}
0 266

  
tmp/org.txm.tigersearch.rcp/plugin.xml (revision 2153)
28 28
            id="org.txm.tigersearch.commands.ComputeRecette2"
29 29
            name="%command.name.1">
30 30
      </command>
31
      <command
32
            categoryId="TIGERSearch4TXM.commands.category"
33
            defaultHandler="org.txm.tigersearch.commands.ImportTIGERAnnotations"
34
            id="org.txm.tigersearch.commands.ImportTIGERAnnotations"
35
            name="Import TIGERSearch Annotations...">
36
      </command>
31 37
   </extension>
32 38
   <extension
33 39
         point="org.eclipse.core.expressions.propertyTesters">
......
161 167
            </parameter>
162 168
         </command>
163 169
      </menuContribution>
170
      <menuContribution
171
            locationURI="menu:menu.corpus?after=menu.corpus.build">
172
         <command
173
               commandId="org.txm.tigersearch.commands.ImportTIGERAnnotations"
174
               icon="icons/functions/TSplus.png"
175
               style="push">
176
            <visibleWhen
177
                  checkEnabled="false">
178
               <or>
179
                  <test
180
                        forcePluginActivation="true"
181
                        property="org.txm.rcp.testers.TIGERSearchReady"
182
                        value="TIGERSearchReady">
183
                  </test>
184
                  <reference
185
                        definitionId="OneMainCorpusSelected">
186
                  </reference>
187
               </or>
188
            </visibleWhen>
189
         </command>
190
      </menuContribution>
164 191
   </extension>
165 192
   <extension
166 193
         point="org.eclipse.ui.editors">

Also available in: Unified diff