Révision 2931
tmp/org.txm.utils/src/org/txm/utils/TableReader.java (revision 2931) | ||
---|---|---|
120 | 120 |
tsvReader.close(); |
121 | 121 |
} else if (excelReader != null) { |
122 | 122 |
excelReader.close(); |
123 |
} else if (odsReader != null) {
|
|
123 |
} else if (odsReader != null) { |
|
124 | 124 |
odsReader.close(); |
125 | 125 |
} |
126 | 126 |
} |
tmp/org.txm.libs.msoffice/src/org/txm/libs/msoffice/ReadExcel.java (revision 2931) | ||
---|---|---|
119 | 119 |
} |
120 | 120 |
|
121 | 121 |
/** |
122 |
* extract lines and columns following rules.
|
|
122 |
* search and replace lines utils
|
|
123 | 123 |
* |
124 |
* don't forget to save the ReadeExcel object |
|
125 |
* |
|
126 | 124 |
* @param excel2 |
127 | 125 |
* @param lineRules |
128 | 126 |
* @param columnsSelection |
... | ... | |
141 | 139 |
} |
142 | 140 |
|
143 | 141 |
/** |
142 |
* extract lines and columns following rules. |
|
143 |
* |
|
144 |
* don't forget to save the ReadeExcel object |
|
145 |
* |
|
146 |
* @param excel2 |
|
147 |
* @param lineRules |
|
148 |
* @param columnsSelection |
|
149 |
* @return |
|
150 |
* @throws EncryptedDocumentException |
|
151 |
* @throws InvalidFormatException |
|
152 |
* @throws IOException |
|
153 |
*/ |
|
154 |
public boolean copyColumns(HashMap<String, String> columnsToCopy) throws EncryptedDocumentException, InvalidFormatException, IOException { |
|
155 |
Row headers = ws.getRow(0); |
|
156 |
if (headers == null) return false; |
|
157 |
|
|
158 |
int colMax = headers.getLastCellNum(); |
|
159 |
int nRows = ws.getPhysicalNumberOfRows(); |
|
160 |
|
|
161 |
HashMap<String, Integer> searchColumns = new HashMap<>(); |
|
162 |
for (int colIndex = 0; colIndex < colMax; colIndex++) { |
|
163 |
Cell cell = headers.getCell(colIndex); |
|
164 |
if (cell != null) { |
|
165 |
String value = cellToString(cell).trim(); |
|
166 |
if (columnsToCopy.containsKey(value)) { |
|
167 |
searchColumns.put(value, colIndex); |
|
168 |
} |
|
169 |
} |
|
170 |
} |
|
171 |
|
|
172 |
if (searchColumns.keySet().size() != columnsToCopy.keySet().size()) { |
|
173 |
System.out.println("Error: not all columns found=" + searchColumns.keySet() + " of " + columnsToCopy.keySet()); |
|
174 |
ArrayList<String> notFound = new ArrayList<>(columnsToCopy.keySet()); |
|
175 |
notFound.removeAll(searchColumns.keySet()); |
|
176 |
System.out.println("NOT FOUND: " + notFound); |
|
177 |
return false; |
|
178 |
} |
|
179 |
|
|
180 |
System.out.println("N Rows to update: " + nRows); |
|
181 |
int nRowWritten = 0; |
|
182 |
|
|
183 |
for (int rowIndex = 0; rowIndex < nRows; rowIndex++) { // update all rows, starting from the second row (first row is the header) |
|
184 |
Row row = ws.getRow(rowIndex); |
|
185 |
if (row == null) continue; |
|
186 |
|
|
187 |
int icol = 0; |
|
188 |
for (String col : columnsToCopy.keySet()) { |
|
189 |
int colIndex = searchColumns.get(col); |
|
190 |
|
|
191 |
Cell cell = row.getCell(colIndex); |
|
192 |
if (cell != null) { |
|
193 |
Cell newCell = row.createCell(colMax + icol); |
|
194 |
if (rowIndex == 0) { |
|
195 |
newCell.setCellValue(columnsToCopy.get(col)); // first column must be renamed |
|
196 |
} |
|
197 |
else { |
|
198 |
newCell.setCellValue(cellToString(cell).trim()); |
|
199 |
} |
|
200 |
} |
|
201 |
icol++; |
|
202 |
} |
|
203 |
|
|
204 |
nRowWritten++; |
|
205 |
} |
|
206 |
|
|
207 |
System.out.println("" + nRowWritten + " rows updated."); |
|
208 |
return nRowWritten > 0; |
|
209 |
} |
|
210 |
|
|
211 |
/** |
|
212 |
* extract lines and columns following rules. |
|
213 |
* |
|
214 |
* don't forget to save the ReadeExcel object |
|
215 |
* |
|
216 |
* @param excel2 |
|
217 |
* @param lineRules |
|
218 |
* @param columnsSelection |
|
219 |
* @return |
|
220 |
* @throws EncryptedDocumentException |
|
221 |
* @throws InvalidFormatException |
|
222 |
* @throws IOException |
|
223 |
*/ |
|
224 |
public boolean searchAndReplaceInLines(HashMap<String, String[]> searchAndReplaceRules) throws EncryptedDocumentException, InvalidFormatException, IOException { |
|
225 |
Row headers = ws.getRow(0); |
|
226 |
if (headers == null) return false; |
|
227 |
|
|
228 |
int colMax = headers.getLastCellNum(); |
|
229 |
int nRows = ws.getPhysicalNumberOfRows(); |
|
230 |
|
|
231 |
HashMap<String, Integer> searchColumns = new HashMap<>(); |
|
232 |
for (int colIndex = 0; colIndex < colMax; colIndex++) { |
|
233 |
Cell cell = headers.getCell(colIndex); |
|
234 |
if (cell != null) { |
|
235 |
String value = cellToString(cell).trim(); |
|
236 |
if (searchAndReplaceRules.containsKey(value)) { |
|
237 |
searchColumns.put(value, colIndex); |
|
238 |
} |
|
239 |
} |
|
240 |
} |
|
241 |
|
|
242 |
if (searchColumns.keySet().size() != searchAndReplaceRules.keySet().size()) { |
|
243 |
System.out.println("Error: not all columns found=" + searchColumns.keySet() + " of " + searchAndReplaceRules.keySet()); |
|
244 |
ArrayList<String> notFound = new ArrayList<>(searchAndReplaceRules.keySet()); |
|
245 |
notFound.removeAll(searchColumns.keySet()); |
|
246 |
System.out.println("NOT FOUND: " + notFound); |
|
247 |
return false; |
|
248 |
} |
|
249 |
|
|
250 |
System.out.println("N Rows to update: " + nRows); |
|
251 |
int nRowWritten = 0; |
|
252 |
|
|
253 |
for (int rowIndex = 0; rowIndex < nRows; rowIndex++) { // update all rows, starting from the second row (first row is the header) |
|
254 |
Row row = ws.getRow(rowIndex); |
|
255 |
if (row == null) continue; |
|
256 |
|
|
257 |
boolean change = false; |
|
258 |
for (String col : searchAndReplaceRules.keySet()) { |
|
259 |
int colIndex = searchColumns.get(col); |
|
260 |
|
|
261 |
Cell cell = row.getCell(colIndex); |
|
262 |
if (cell != null) { |
|
263 |
String value = cellToString(cell).trim(); |
|
264 |
String[] replace = searchAndReplaceRules.get(col); |
|
265 |
String newValue = value.replaceAll(replace[0], replace[1]); |
|
266 |
if (!value.equals(newValue)) { |
|
267 |
cell.setCellValue(newValue); |
|
268 |
change = true; |
|
269 |
} |
|
270 |
} |
|
271 |
} |
|
272 |
|
|
273 |
if (change) { |
|
274 |
nRowWritten++; |
|
275 |
} |
|
276 |
} |
|
277 |
|
|
278 |
System.out.println("" + nRowWritten + " rows updated."); |
|
279 |
|
|
280 |
return true; |
|
281 |
} |
|
282 |
|
|
283 |
/** |
|
144 | 284 |
* one rule (regex test) per column |
145 | 285 |
* all lines are processed |
146 | 286 |
* |
... | ... | |
208 | 348 |
String value = cellToString(cell).trim(); |
209 | 349 |
if (value.matches(columnsTest.get(i))) { |
210 | 350 |
|
211 |
// write new line |
|
212 |
|
|
351 |
// write new line in Sheet ws2 |
|
213 | 352 |
row2 = ws2.createRow(nRowWritten); |
214 | 353 |
nCell = 0; |
215 | 354 |
for (int iCol : columnIdxToWrite) { |
tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERSVOSummaryMacro.groovy (revision 2931) | ||
---|---|---|
169 | 169 |
} |
170 | 170 |
} |
171 | 171 |
|
172 |
writer.writeLine(lineToWrite) |
|
172 |
def row = writer.writeLine(lineToWrite) |
|
173 |
row.setHeight(50, true) |
|
173 | 174 |
} |
174 | 175 |
} |
175 | 176 |
|
176 | 177 |
writer.save() |
177 | 178 |
cpb.done() |
179 |
println "Results save in "+new_ods_file.getAbsolutePath() |
|
178 | 180 |
|
179 | 181 |
return results |
tmp/org.txm.tigersearch.rcp/src/org/txm/function/tigersearch/TIGERSearch.java (revision 2931) | ||
---|---|---|
254 | 254 |
} |
255 | 255 |
tsresult = tscorpus.query(pQuery); |
256 | 256 |
tsresult.getFirst(); |
257 |
return true; |
|
257 |
|
|
258 |
return tsresult.getNumberOfMatch() > 0; |
|
258 | 259 |
} |
259 | 260 |
|
260 | 261 |
@Override |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSCorpus.java (revision 2931) | ||
---|---|---|
53 | 53 |
* The Class TSCorpus. |
54 | 54 |
*/ |
55 | 55 |
public class TSCorpus { |
56 |
|
|
56 |
|
|
57 | 57 |
/** The id. */ |
58 | 58 |
String id; |
59 |
|
|
59 |
|
|
60 | 60 |
/** The managers. */ |
61 | 61 |
public TSCorpusManager tsmanager; |
62 |
|
|
62 | 63 |
public InternalCorpusQueryManagerLocal2 manager = null; |
64 |
|
|
63 | 65 |
ExportManager exporter; |
64 |
|
|
66 |
|
|
65 | 67 |
/** The config. */ |
66 | 68 |
TIGERGraphViewerConfiguration config; |
67 |
|
|
69 |
|
|
68 | 70 |
/** The initok. */ |
69 | 71 |
boolean initok = false; |
70 |
|
|
72 |
|
|
71 | 73 |
/** The results. */ |
72 |
HashMap<Integer, TSResult> results = new HashMap<Integer, TSResult>();
|
|
73 |
|
|
74 |
HashMap<Integer, TSResult> results = new HashMap<>(); |
|
75 |
|
|
74 | 76 |
// Additional data for corpus alignment with TXM base corpus (CQP corpus) |
75 | 77 |
RandomAccessFile offsetsRAFile = null; |
78 |
|
|
76 | 79 |
FileChannel offsetsFileChannel = null; |
80 |
|
|
77 | 81 |
MappedByteBuffer offsetsMapped = null; // one offset per tiger position |
82 |
|
|
78 | 83 |
RandomAccessFile presencesRAFile = null; |
84 |
|
|
79 | 85 |
FileChannel presencesFileChannel = null; |
86 |
|
|
80 | 87 |
MappedByteBuffer presencesMapped = null; // one 0/1 boolean per tiger position |
81 |
|
|
88 |
|
|
82 | 89 |
private int[] sentence_starts; |
83 |
|
|
90 |
|
|
84 | 91 |
@Override |
85 | 92 |
public void finalize() { |
86 | 93 |
try { |
87 | 94 |
close(); |
88 |
} catch(Exception e) { |
|
95 |
} |
|
96 |
catch (Exception e) { |
|
89 | 97 |
e.printStackTrace(); |
90 | 98 |
} |
91 | 99 |
} |
... | ... | |
97 | 105 |
* @param tsmanager the tsmanager |
98 | 106 |
*/ |
99 | 107 |
public TSCorpus(String corpusId, TSCorpusManager tsmanager) { |
100 |
|
|
108 |
|
|
101 | 109 |
String regpath = tsmanager.getRegistryPath(); |
102 | 110 |
String confpath = tsmanager.getconfPath(); |
103 | 111 |
try { |
... | ... | |
107 | 115 |
|
108 | 116 |
manager.getQueryProcessor(); |
109 | 117 |
config = new TIGERGraphViewerConfiguration(confpath, confpath, confpath); |
110 |
this.id = corpusId;
|
|
118 |
this.id = corpusId; |
|
111 | 119 |
initok = opencorpus(); |
112 | 120 |
exporter = new ExportManager(manager, ""); //$NON-NLS-1$ |
113 |
|
|
114 |
File offsetsFile = new File(regpath, corpusId+"/offsets.data");
|
|
121 |
|
|
122 |
File offsetsFile = new File(regpath, corpusId + "/offsets.data");
|
|
115 | 123 |
if (offsetsFile.exists()) { |
116 | 124 |
offsetsRAFile = new RandomAccessFile(offsetsFile, "rw"); |
117 | 125 |
offsetsFileChannel = offsetsRAFile.getChannel(); |
118 | 126 |
offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, offsetsFileChannel.size()); |
119 | 127 |
} |
120 |
|
|
121 |
//out.putInt(positions[i]) |
|
122 |
|
|
123 |
File presencesFile = new File(regpath, corpusId+"/presences.data");
|
|
124 |
|
|
128 |
|
|
129 |
// out.putInt(positions[i])
|
|
130 |
|
|
131 |
File presencesFile = new File(regpath, corpusId + "/presences.data");
|
|
132 |
|
|
125 | 133 |
if (presencesFile.exists()) { |
126 | 134 |
presencesRAFile = new RandomAccessFile(presencesFile, "rw"); |
127 | 135 |
presencesFileChannel = presencesRAFile.getChannel(); |
128 | 136 |
presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_ONLY, 0, presencesFileChannel.size()); |
129 | 137 |
} |
130 |
} catch (Exception e) { |
|
138 |
} |
|
139 |
catch (Exception e) { |
|
131 | 140 |
// TODO Auto-generated catch block |
132 | 141 |
org.txm.utils.logger.Log.printStackTrace(e); |
133 | 142 |
} |
... | ... | |
136 | 145 |
public String getID() { |
137 | 146 |
return this.id; |
138 | 147 |
} |
139 |
|
|
148 |
|
|
140 | 149 |
public void close() { |
141 | 150 |
try { |
142 | 151 |
if (presencesRAFile != null) presencesRAFile.close(); |
... | ... | |
145 | 154 |
if (offsetsFileChannel != null) offsetsFileChannel.close(); |
146 | 155 |
if (sentence_starts != null) sentence_starts = null; |
147 | 156 |
if (results != null) results.clear(); |
148 |
} catch (IOException e) { |
|
157 |
} |
|
158 |
catch (IOException e) { |
|
149 | 159 |
// TODO Auto-generated catch block |
150 | 160 |
e.printStackTrace(); |
151 | 161 |
} |
152 | 162 |
} |
153 |
|
|
163 |
|
|
154 | 164 |
public int getOffset(int tigerPosition) { |
155 | 165 |
if (offsetsMapped != null) { |
156 |
return offsetsMapped.getInt(tigerPosition*Integer.BYTES); |
|
157 |
} else { |
|
166 |
return offsetsMapped.getInt(tigerPosition * Integer.BYTES); |
|
167 |
} |
|
168 |
else { |
|
158 | 169 |
return 0; |
159 | 170 |
} |
160 | 171 |
} |
161 |
|
|
172 |
|
|
162 | 173 |
public int[] getOffsets(int tigerPositions[]) { |
163 | 174 |
int[] ret = new int[tigerPositions.length]; |
164 | 175 |
if (offsetsMapped != null) { |
165 |
for (int i = 0 ; i < tigerPositions.length ; i++) {
|
|
166 |
ret[i] = offsetsMapped.getInt(tigerPositions[i]*Integer.BYTES);
|
|
176 |
for (int i = 0; i < tigerPositions.length; i++) {
|
|
177 |
ret[i] = offsetsMapped.getInt(tigerPositions[i] * Integer.BYTES);
|
|
167 | 178 |
} |
168 | 179 |
} |
169 |
|
|
180 |
|
|
170 | 181 |
return ret; |
171 | 182 |
} |
172 |
|
|
183 |
|
|
173 | 184 |
public MappedByteBuffer getOffsetsMapped() { |
174 | 185 |
return offsetsMapped; |
175 | 186 |
} |
176 |
|
|
187 |
|
|
177 | 188 |
public MappedByteBuffer getPresencesMapped() { |
178 | 189 |
return presencesMapped; |
179 | 190 |
} |
180 |
|
|
191 |
|
|
181 | 192 |
public int getPresence(int tigerPosition) { |
182 | 193 |
if (presencesMapped != null) { |
183 |
return presencesMapped.getInt(tigerPosition*Integer.BYTES); |
|
184 |
} else { |
|
194 |
return presencesMapped.getInt(tigerPosition * Integer.BYTES); |
|
195 |
} |
|
196 |
else { |
|
185 | 197 |
return 0; |
186 | 198 |
} |
187 | 199 |
} |
188 |
|
|
200 |
|
|
189 | 201 |
public int[] getPresences(int tigerPositions[]) { |
190 | 202 |
int[] ret = new int[tigerPositions.length]; |
191 | 203 |
if (presencesMapped != null) { |
192 |
for (int i = 0 ; i < tigerPositions.length ; i++) {
|
|
193 |
ret[i] = presencesMapped.getInt(tigerPositions[i]*Integer.BYTES);
|
|
204 |
for (int i = 0; i < tigerPositions.length; i++) {
|
|
205 |
ret[i] = presencesMapped.getInt(tigerPositions[i] * Integer.BYTES);
|
|
194 | 206 |
} |
195 | 207 |
} |
196 |
|
|
208 |
|
|
197 | 209 |
return ret; |
198 | 210 |
} |
199 |
|
|
211 |
|
|
200 | 212 |
public static boolean createLogPropFile(File directory) { |
201 | 213 |
directory.mkdirs(); |
202 | 214 |
File logprop = new File(directory, "tigersearch.logprop"); |
203 | 215 |
try { |
204 |
IOUtils.write(logprop, "# Default log configuration of the TIGERSearch suite\n" +
|
|
205 |
"log4j.rootLogger=SEVERE,Logfile\n" +
|
|
206 |
"log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=SEVERE\n" +
|
|
207 |
"log4j.appender.Logfile=org.apache.log4j.RollingFileAppender\n" +
|
|
208 |
"log4j.appender.Logfile.File=\\${user.home}/tigersearch/tigersearch.log\n" +
|
|
209 |
"log4j.appender.Logfile.MaxFileSize=500KB\n" +
|
|
210 |
"log4j.appender.Logfile.MaxBackupIndex=1\n" +
|
|
211 |
"log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout\n" +
|
|
216 |
IOUtils.write(logprop, "# Default log configuration of the TIGERSearch suite\n" + |
|
217 |
"log4j.rootLogger=SEVERE,Logfile\n" + |
|
218 |
"log4j.logger.ims.tiger.gui.tigersearch.TIGERSearch=SEVERE\n" + |
|
219 |
"log4j.appender.Logfile=org.apache.log4j.RollingFileAppender\n" + |
|
220 |
"log4j.appender.Logfile.File=\\${user.home}/tigersearch/tigersearch.log\n" + |
|
221 |
"log4j.appender.Logfile.MaxFileSize=500KB\n" + |
|
222 |
"log4j.appender.Logfile.MaxBackupIndex=1\n" + |
|
223 |
"log4j.appender.Logfile.layout=org.apache.log4j.PatternLayout\n" + |
|
212 | 224 |
"log4j.appender.Logfile.layout.ConversionPattern=%5r %-5p [%t] %c{2} - %m%n"); |
213 |
} catch (Exception e) { |
|
225 |
} |
|
226 |
catch (Exception e) { |
|
214 | 227 |
// TODO Auto-generated catch block |
215 | 228 |
e.printStackTrace(); |
216 | 229 |
return false; |
217 | 230 |
} |
218 | 231 |
return true; |
219 | 232 |
} |
220 |
|
|
233 |
|
|
221 | 234 |
public void setDisplayProperties(Header header, List<String> tprops, String ntprop) { |
222 |
config.setDisplayedTFeatures(header, tprops);
|
|
235 |
config.setDisplayedTFeatures(header, tprops); |
|
223 | 236 |
config.setDisplayedNTFeature(header, ntprop); |
224 | 237 |
} |
225 |
|
|
226 |
public InternalCorpusQueryManager getInternalManager() |
|
227 |
{ |
|
238 |
|
|
239 |
public InternalCorpusQueryManager getInternalManager() { |
|
228 | 240 |
return manager; |
229 | 241 |
} |
230 |
|
|
231 |
public List<String> getNTFeatures() |
|
232 |
{ |
|
233 |
|
|
242 |
|
|
243 |
public List<String> getNTFeatures() { |
|
244 |
|
|
234 | 245 |
return manager.getHeader().getAllNTFeatureNames(); |
235 |
|
|
246 |
|
|
236 | 247 |
} |
237 |
|
|
238 |
public List<String> getTFeatures() |
|
239 |
{ |
|
248 |
|
|
249 |
public List<String> getTFeatures() { |
|
240 | 250 |
return manager.getHeader().getAllTFeatureNames(); |
241 | 251 |
} |
242 |
|
|
252 |
|
|
243 | 253 |
/** |
244 | 254 |
* contains a lot of informations about the corpus |
255 |
* |
|
245 | 256 |
* @return |
246 | 257 |
*/ |
247 |
public Header getHeader() |
|
248 |
{ |
|
258 |
public Header getHeader() { |
|
249 | 259 |
return manager.getHeader(); |
250 | 260 |
} |
251 |
|
|
261 |
|
|
252 | 262 |
/** |
253 | 263 |
* Opencorpus. |
254 | 264 |
* |
255 | 265 |
* @return true, if successful |
256 | 266 |
*/ |
257 |
public boolean opencorpus() |
|
258 |
{ |
|
267 |
public boolean opencorpus() { |
|
259 | 268 |
try { |
260 | 269 |
manager.loadCorpus(id); |
261 |
//manager.getC |
|
262 | 270 |
return true; |
263 | 271 |
} |
264 |
catch (Exception e) { System.out.println(TXMCoreMessages.couldntReadCorpusColon+e.getMessage());} |
|
272 |
catch (Exception e) { |
|
273 |
System.out.println(TXMCoreMessages.couldntReadCorpusColon + e.getMessage()); |
|
274 |
} |
|
265 | 275 |
return false; |
266 | 276 |
} |
267 |
|
|
277 |
|
|
268 | 278 |
/** |
269 | 279 |
* Query. |
270 | 280 |
* |
271 | 281 |
* @param query the query |
272 | 282 |
* @return the tS result |
273 |
* @throws Exception
|
|
283 |
* @throws Exception |
|
274 | 284 |
*/ |
275 |
public TSResult query(String query) throws Exception |
|
276 |
{ |
|
285 |
public TSResult query(String query) throws Exception { |
|
277 | 286 |
return query(query, -1, -1, -1); |
278 | 287 |
} |
279 |
|
|
288 |
|
|
280 | 289 |
/** |
281 | 290 |
* Query. |
282 | 291 |
* |
283 | 292 |
* @param query the query |
284 | 293 |
* @return the tS result |
285 |
* @throws Exception
|
|
294 |
* @throws Exception |
|
286 | 295 |
*/ |
287 |
public TSResult query(String query, int sent_min, int sent_max, int match_max) throws Exception |
|
288 |
{ |
|
296 |
public TSResult query(String query, int sent_min, int sent_max, int match_max) throws Exception { |
|
289 | 297 |
if (results.containsKey(query.hashCode())) |
290 | 298 |
return results.get(query.hashCode()); |
291 |
|
|
299 |
|
|
292 | 300 |
TSResult rez = new TSResult(query, this, sent_min, sent_max, match_max); |
293 | 301 |
results.put(query.hashCode(), rez); |
294 | 302 |
return rez; |
295 | 303 |
} |
296 |
|
|
304 |
|
|
297 | 305 |
/** |
298 | 306 |
* Checks if is ok. |
299 | 307 |
* |
300 | 308 |
* @return true, if is ok |
301 | 309 |
*/ |
302 |
public boolean isOk() {
|
|
310 |
public boolean isOk() { |
|
303 | 311 |
return initok; |
304 | 312 |
} |
305 |
|
|
313 |
|
|
306 | 314 |
public Index getIndex() { |
307 | 315 |
InternalCorpusQueryManagerLocal2 tigermanager = this.manager; |
308 | 316 |
CorpusQueryProcessor processor = tigermanager.getQueryProcessor(); |
309 | 317 |
return processor.getIndex(); |
310 | 318 |
} |
311 |
|
|
319 |
|
|
312 | 320 |
public int[] getSentenceStartPositions() throws IndexException { |
313 | 321 |
if (sentence_starts != null) { |
314 | 322 |
return sentence_starts; |
... | ... | |
316 | 324 |
Index index = getIndex(); |
317 | 325 |
|
318 | 326 |
sentence_starts = new int[index.getNumberOfGraphs()]; |
319 |
for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
|
|
327 |
for (int i = 0; i < index.getNumberOfGraphs(); i++) {
|
|
320 | 328 |
sentence_starts[i] = 0; |
321 | 329 |
if (i > 0) { |
322 |
sentence_starts[i] += index.getNumberOfTNodes(i-1) + sentence_starts[i-1];
|
|
330 |
sentence_starts[i] += index.getNumberOfTNodes(i - 1) + sentence_starts[i - 1];
|
|
323 | 331 |
} |
324 | 332 |
} |
325 | 333 |
|
... | ... | |
333 | 341 |
public TSProperty getNTProperty(String name) { |
334 | 342 |
return new TSProperty(this, name, false); |
335 | 343 |
} |
336 |
} |
|
344 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSResult.java (revision 2931) | ||
---|---|---|
54 | 54 |
* The Class TSResult. |
55 | 55 |
*/ |
56 | 56 |
public class TSResult { |
57 |
|
|
57 |
|
|
58 | 58 |
/** The result. */ |
59 | 59 |
MatchResult result; |
60 |
|
|
60 |
|
|
61 | 61 |
/** The forest. */ |
62 | 62 |
ResultForest forest; |
63 |
|
|
63 |
|
|
64 | 64 |
/** The header. */ |
65 | 65 |
Header header; |
66 |
|
|
66 |
|
|
67 | 67 |
/** The ts corpus. */ |
68 | 68 |
TSCorpus tsCorpus; |
69 |
|
|
69 |
|
|
70 | 70 |
/** The matches. */ |
71 | 71 |
TSMatch[] matches; |
72 |
|
|
73 |
// /** The current match. */
|
|
74 |
// int currentMatchNo = 0;
|
|
75 |
// TSMatch currentMatch;
|
|
76 |
|
|
72 |
|
|
73 |
// /** The current match. */
|
|
74 |
// int currentMatchNo = 0;
|
|
75 |
// TSMatch currentMatch;
|
|
76 |
|
|
77 | 77 |
/** The query. */ |
78 | 78 |
private String query; |
79 |
|
|
79 |
|
|
80 | 80 |
/** The manager. */ |
81 | 81 |
private InternalCorpusQueryManager manager; |
82 |
|
|
82 |
|
|
83 | 83 |
/** The corpus. */ |
84 |
//private TSCorpus corpus; |
|
85 |
|
|
84 |
// private TSCorpus corpus;
|
|
85 |
|
|
86 | 86 |
/** The tsquerytosvg. */ |
87 | 87 |
QueryToSvg tsquerytosvg = null; |
88 |
|
|
88 |
|
|
89 | 89 |
/** |
90 | 90 |
* Instantiates a new tS result. |
91 | 91 |
* |
92 | 92 |
* @param query the query |
93 | 93 |
* @param tsCorpus the ts corpus |
94 |
* @throws QueryFilterException
|
|
95 |
* @throws QueryOptimizationException
|
|
96 |
* @throws QueryIndexException
|
|
97 |
* @throws QueryEvaluationException
|
|
98 |
* @throws QueryNormalizationException
|
|
99 |
* @throws Exception
|
|
94 |
* @throws QueryFilterException |
|
95 |
* @throws QueryOptimizationException |
|
96 |
* @throws QueryIndexException |
|
97 |
* @throws QueryEvaluationException |
|
98 |
* @throws QueryNormalizationException |
|
99 |
* @throws Exception |
|
100 | 100 |
*/ |
101 | 101 |
public TSResult(String query, TSCorpus tsCorpus, int sent_min, int sent_max, int match_max) throws Exception { |
102 |
|
|
102 |
|
|
103 | 103 |
this.query = query; |
104 | 104 |
this.manager = tsCorpus.manager; |
105 |
//this.corpus = tsCorpus; |
|
105 |
// this.corpus = tsCorpus;
|
|
106 | 106 |
this.tsCorpus = tsCorpus; |
107 |
|
|
107 |
|
|
108 | 108 |
result = tsCorpus.manager.processQuery(query, sent_min, sent_max, match_max); |
109 | 109 |
|
110 | 110 |
if (result.size() > 0) { |
111 | 111 |
forest = new ResultForest(result, tsCorpus.manager); |
112 | 112 |
header = forest.getHeader(); |
113 |
|
|
113 |
|
|
114 | 114 |
matches = new TSMatch[result.size()]; |
115 | 115 |
tsquerytosvg = new QueryToSvg(manager, result, forest, header, tsCorpus.config); |
116 | 116 |
} |
117 |
else { |
|
118 |
matches = new TSMatch[0]; |
|
119 |
} |
|
117 | 120 |
} |
118 |
|
|
121 |
|
|
119 | 122 |
public ResultForest getForest() { |
120 | 123 |
return forest; |
121 | 124 |
} |
122 |
|
|
125 |
|
|
123 | 126 |
public MatchResult getMatchResult() { |
124 | 127 |
return result; |
125 | 128 |
} |
126 |
|
|
129 |
|
|
127 | 130 |
public InternalCorpusQueryManager getManager() { |
128 | 131 |
return manager; |
129 | 132 |
} |
130 |
|
|
133 |
|
|
131 | 134 |
/** |
132 | 135 |
* Gets the number of match. |
133 | 136 |
* |
134 | 137 |
* @return the number of match |
135 | 138 |
*/ |
136 |
public int getNumberOfMatch() |
|
137 |
{ |
|
139 |
public int getNumberOfMatch() { |
|
138 | 140 |
return result.size(); |
139 | 141 |
} |
140 |
|
|
141 |
// /**
|
|
142 |
// * return the no of match, begins with 1.
|
|
143 |
// *
|
|
144 |
// * @return the current match no
|
|
145 |
// */
|
|
146 |
// public int getCurrentMatchNo() {
|
|
147 |
// return currentMatchNo;
|
|
148 |
// }
|
|
149 |
// |
|
150 |
// /**
|
|
151 |
// * return the no of match, begins with 1.
|
|
152 |
// *
|
|
153 |
// * @return the current match no
|
|
154 |
// */
|
|
155 |
// public TSMatch getCurrentMatch() {
|
|
156 |
// return currentMatch;
|
|
157 |
// }
|
|
158 |
|
|
159 |
// /**
|
|
160 |
// * return the no of match, begins with 1.
|
|
161 |
// *
|
|
162 |
// * @return the current sentence no
|
|
163 |
// */
|
|
164 |
// public int getCurrentSentenceNo() {
|
|
165 |
// System.out.println(TXMCoreMessages.numberOfMatch+this.getNumberOfMatch());
|
|
166 |
// System.out.println("current match "+this.getCurrentMatchNo()); //$NON-NLS-1$
|
|
167 |
// System.out.println(TXMCoreMessages.numberOfSubMatch+this.getCurrentMatch().getNumberOfSubGraph());
|
|
168 |
// System.out.println("current sub graph "+this.getCurrentMatch().getCurrentSubMatchNo()); //$NON-NLS-1$
|
|
169 |
// return result.getSentenceNumberAt(this.currentMatchNo); // + 1 ?
|
|
170 |
// }
|
|
171 |
|
|
142 |
|
|
143 |
// /**
|
|
144 |
// * return the no of match, begins with 1.
|
|
145 |
// *
|
|
146 |
// * @return the current match no
|
|
147 |
// */
|
|
148 |
// public int getCurrentMatchNo() {
|
|
149 |
// return currentMatchNo;
|
|
150 |
// }
|
|
151 |
//
|
|
152 |
// /**
|
|
153 |
// * return the no of match, begins with 1.
|
|
154 |
// *
|
|
155 |
// * @return the current match no
|
|
156 |
// */
|
|
157 |
// public TSMatch getCurrentMatch() {
|
|
158 |
// return currentMatch;
|
|
159 |
// }
|
|
160 |
|
|
161 |
// /**
|
|
162 |
// * return the no of match, begins with 1.
|
|
163 |
// *
|
|
164 |
// * @return the current sentence no
|
|
165 |
// */
|
|
166 |
// public int getCurrentSentenceNo() {
|
|
167 |
// System.out.println(TXMCoreMessages.numberOfMatch+this.getNumberOfMatch());
|
|
168 |
// System.out.println("current match "+this.getCurrentMatchNo()); //$NON-NLS-1$
|
|
169 |
// System.out.println(TXMCoreMessages.numberOfSubMatch+this.getCurrentMatch().getNumberOfSubGraph());
|
|
170 |
// System.out.println("current sub graph "+this.getCurrentMatch().getCurrentSubMatchNo()); //$NON-NLS-1$
|
|
171 |
// return result.getSentenceNumberAt(this.currentMatchNo); // + 1 ?
|
|
172 |
// }
|
|
173 |
|
|
172 | 174 |
/** |
173 | 175 |
* Gets the match. |
174 | 176 |
* |
175 | 177 |
* @param matchNo the no |
176 | 178 |
* @return the match |
177 | 179 |
*/ |
178 |
public TSMatch getMatch(int matchNo) |
|
179 |
{ |
|
180 |
public TSMatch getMatch(int matchNo) { |
|
180 | 181 |
if (matches[matchNo] == null) { |
181 | 182 |
TSMatch m = new TSMatch(matchNo, this.tsquerytosvg, this); |
182 | 183 |
matches[matchNo] = m; |
183 | 184 |
} |
184 |
|
|
185 |
return matches[matchNo] ;
|
|
185 |
|
|
186 |
return matches[matchNo]; |
|
186 | 187 |
} |
187 |
|
|
188 |
|
|
188 | 189 |
public void setDisplayProperties(List<String> tprops, String feature) { |
189 | 190 |
tsCorpus.setDisplayProperties(this.header, tprops, feature); |
190 | 191 |
} |
191 |
|
|
192 |
|
|
192 | 193 |
/** |
193 | 194 |
* Gets the first. |
194 | 195 |
* |
195 | 196 |
* @return the first |
196 | 197 |
*/ |
197 |
public TSMatch getFirst() |
|
198 |
{ |
|
198 |
public TSMatch getFirst() { |
|
199 | 199 |
if (result.size() > 0) { |
200 |
// currentMatchNo = 0;
|
|
201 |
// currentMatch = getMatch(0);
|
|
200 |
// currentMatchNo = 0;
|
|
201 |
// currentMatch = getMatch(0);
|
|
202 | 202 |
return getMatch(0); |
203 |
} else { |
|
203 |
} |
|
204 |
else { |
|
204 | 205 |
return null; |
205 | 206 |
} |
206 | 207 |
} |
207 |
|
|
208 |
|
|
208 | 209 |
/** |
209 | 210 |
* Gets the last. |
210 | 211 |
* |
211 | 212 |
* @return the last |
212 | 213 |
*/ |
213 |
public TSMatch getLast() |
|
214 |
{ |
|
214 |
public TSMatch getLast() { |
|
215 | 215 |
if (result.size() > 0) { |
216 |
// currentMatchNo = result.size() -1; |
|
217 |
// currentMatch = getMatch(result.size() -1); |
|
218 |
return getMatch(result.size() -1); |
|
219 |
} else { |
|
216 |
// currentMatchNo = result.size() -1; |
|
217 |
// currentMatch = getMatch(result.size() -1); |
|
218 |
return getMatch(result.size() - 1); |
|
219 |
} |
|
220 |
else { |
|
220 | 221 |
return null; |
221 | 222 |
} |
222 | 223 |
} |
223 |
// |
|
224 |
// /** |
|
225 |
// * Gets the next. |
|
226 |
// * |
|
227 |
// * @return the next |
|
228 |
// */ |
|
229 |
// public TSMatch getNext() |
|
230 |
// { |
|
231 |
// int next = currentMatchNo + 1; |
|
232 |
// if (result.size() > next) { |
|
233 |
// currentMatchNo = next; |
|
234 |
// currentMatch = getMatch(next); |
|
235 |
// return currentMatch; |
|
236 |
// } else { |
|
237 |
// return null; |
|
238 |
// } |
|
239 |
// } |
|
240 |
// |
|
241 |
// /** |
|
242 |
// * Gets the previous. |
|
243 |
// * |
|
244 |
// * @return the previous |
|
245 |
// */ |
|
246 |
// public TSMatch getPrevious() |
|
247 |
// { |
|
248 |
// int next = currentMatchNo - 1; |
|
249 |
// if (next >= 0 && result.size() > 0) { |
|
250 |
// currentMatchNo = next; |
|
251 |
// currentMatch = getMatch(next); |
|
252 |
// return currentMatch; |
|
253 |
// } else { |
|
254 |
// return null; |
|
255 |
// } |
|
256 |
// } |
|
257 |
// |
|
258 |
// public TSMatch setCurrentMatch(int graphNo) { |
|
259 |
// currentMatch = this.getMatch(graphNo); |
|
260 |
// currentMatchNo = graphNo; |
|
261 |
// return currentMatch; |
|
262 |
// } |
|
263 |
|
|
264 |
public boolean toXml(File outfile, File xmlFile, File xslFile) throws ExportException, ExportStopException, IOException, TransformerException |
|
265 |
{ |
|
224 |
// |
|
225 |
// /** |
|
226 |
// * Gets the next. |
|
227 |
// * |
|
228 |
// * @return the next |
|
229 |
// */ |
|
230 |
// public TSMatch getNext() |
|
231 |
// { |
|
232 |
// int next = currentMatchNo + 1; |
|
233 |
// if (result.size() > next) { |
|
234 |
// currentMatchNo = next; |
|
235 |
// currentMatch = getMatch(next); |
|
236 |
// return currentMatch; |
|
237 |
// } else { |
|
238 |
// return null; |
|
239 |
// } |
|
240 |
// } |
|
241 |
// |
|
242 |
// /** |
|
243 |
// * Gets the previous. |
|
244 |
// * |
|
245 |
// * @return the previous |
|
246 |
// */ |
|
247 |
// public TSMatch getPrevious() |
|
248 |
// { |
|
249 |
// int next = currentMatchNo - 1; |
|
250 |
// if (next >= 0 && result.size() > 0) { |
|
251 |
// currentMatchNo = next; |
|
252 |
// currentMatch = getMatch(next); |
|
253 |
// return currentMatch; |
|
254 |
// } else { |
|
255 |
// return null; |
|
256 |
// } |
|
257 |
// } |
|
258 |
// |
|
259 |
// public TSMatch setCurrentMatch(int graphNo) { |
|
260 |
// currentMatch = this.getMatch(graphNo); |
|
261 |
// currentMatchNo = graphNo; |
|
262 |
// return currentMatch; |
|
263 |
// } |
|
264 |
|
|
265 |
public boolean toXml(File outfile, File xmlFile, File xslFile) throws ExportException, ExportStopException, IOException, TransformerException { |
|
266 | 266 |
return toXml(outfile, xmlFile, xslFile, false, 30, new ArrayList<String>(), new ArrayList<String>()); |
267 | 267 |
} |
268 |
|
|
269 |
//TODO move this code somewhere |
|
270 |
// public static String CONCSIMPLE = "concordance_simple"; //$NON-NLS-1$
|
|
271 |
// public static String CONCMOTPIVOT = "concordance_mot-pivot"; //$NON-NLS-1$
|
|
272 |
// public static String CONCBLOCKS = "concordance_blocks"; //$NON-NLS-1$
|
|
273 |
// public static String[] EXPORTMETHODS = {CONCSIMPLE, CONCMOTPIVOT, CONCBLOCKS};
|
|
274 |
// public boolean toConcordance(File csvFile, String method, int cx, List<String> list, List<String> list2, boolean punct) throws Exception
|
|
275 |
// {
|
|
276 |
// if (!Arrays.asList(EXPORTMETHODS).contains(method)) {
|
|
277 |
// Log.severe(TXMCoreMessages.TSResult_7+method+TXMCoreMessages.TSResult_8+Arrays.toString(EXPORTMETHODS));
|
|
278 |
// return false;
|
|
279 |
// }
|
|
268 |
|
|
269 |
// TODO move this code somewhere
|
|
270 |
// public static String CONCSIMPLE = "concordance_simple"; //$NON-NLS-1$
|
|
271 |
// public static String CONCMOTPIVOT = "concordance_mot-pivot"; //$NON-NLS-1$
|
|
272 |
// public static String CONCBLOCKS = "concordance_blocks"; //$NON-NLS-1$
|
|
273 |
// public static String[] EXPORTMETHODS = {CONCSIMPLE, CONCMOTPIVOT, CONCBLOCKS};
|
|
274 |
// public boolean toConcordance(File csvFile, String method, int cx, List<String> list, List<String> list2, boolean punct) throws Exception
|
|
275 |
// {
|
|
276 |
// if (!Arrays.asList(EXPORTMETHODS).contains(method)) {
|
|
277 |
// Log.severe(TXMCoreMessages.TSResult_7+method+TXMCoreMessages.TSResult_8+Arrays.toString(EXPORTMETHODS));
|
|
278 |
// return false;
|
|
279 |
// }
|
|
280 | 280 |
// |
281 |
// File xmlFile = File.createTempFile(csvFile.getName(), "EXPORTBRUT.xml", csvFile.getParentFile()); //$NON-NLS-1$
|
|
282 |
// boolean rez = false;
|
|
283 |
// if (punct) {
|
|
284 |
// // export match
|
|
285 |
// //System.out.println("save matches in "+xmlFile);
|
|
286 |
// this.toXml(xmlFile, false, true, false, false, true, false, 0);
|
|
287 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "EXPORTBRUT.xml"));
|
|
288 |
// // merge with TigerXMLPOSPNC
|
|
289 |
// File tmp = File.createTempFile("txm", "AFTERMINJECT.xml", xmlFile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
|
|
290 |
// File tigerXml = new File(tsCorpus.tsmanager.getRegistryPath(), "TigerPnc.xml"); //$NON-NLS-1$
|
|
291 |
// //System.out.println("TIGER XML: "+tigerXml);
|
|
292 |
// if (!tigerXml.exists()) {
|
|
293 |
// System.out.println(TXMCoreMessages.TSResult_13+tigerXml.getAbsolutePath());
|
|
294 |
// return false;
|
|
295 |
// }
|
|
296 |
// //System.out.println("Match inject: in "+tmp);
|
|
297 |
// new MatchInject().script(tigerXml, xmlFile, tmp);
|
|
298 |
// xmlFile.delete();
|
|
299 |
// tmp.renameTo(xmlFile);
|
|
300 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "AFTERMINJECT.xml"));
|
|
301 |
// } else {
|
|
302 |
// this.toXml(xmlFile); // export match + corpus
|
|
303 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "FULLEXPORT.xml"));
|
|
304 |
// }
|
|
281 |
// File xmlFile = File.createTempFile(csvFile.getName(), "EXPORTBRUT.xml", csvFile.getParentFile()); //$NON-NLS-1$
|
|
282 |
// boolean rez = false;
|
|
283 |
// if (punct) {
|
|
284 |
// // export match
|
|
285 |
// //System.out.println("save matches in "+xmlFile);
|
|
286 |
// this.toXml(xmlFile, false, true, false, false, true, false, 0);
|
|
287 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "EXPORTBRUT.xml"));
|
|
288 |
// // merge with TigerXMLPOSPNC
|
|
289 |
// File tmp = File.createTempFile("txm", "AFTERMINJECT.xml", xmlFile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
|
|
290 |
// File tigerXml = new File(tsCorpus.tsmanager.getRegistryPath(), "TigerPnc.xml"); //$NON-NLS-1$
|
|
291 |
// //System.out.println("TIGER XML: "+tigerXml);
|
|
292 |
// if (!tigerXml.exists()) {
|
|
293 |
// System.out.println(TXMCoreMessages.TSResult_13+tigerXml.getAbsolutePath());
|
|
294 |
// return false;
|
|
295 |
// }
|
|
296 |
// //System.out.println("Match inject: in "+tmp);
|
|
297 |
// new MatchInject().script(tigerXml, xmlFile, tmp);
|
|
298 |
// xmlFile.delete();
|
|
299 |
// tmp.renameTo(xmlFile);
|
|
300 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "AFTERMINJECT.xml"));
|
|
301 |
// } else {
|
|
302 |
// this.toXml(xmlFile); // export match + corpus
|
|
303 |
// //FileCopy.copy(xmlFile, new File(xmlFile.getParentFile(), "FULLEXPORT.xml"));
|
|
304 |
// }
|
|
305 | 305 |
// |
306 |
// if (!xmlFile.exists()) { System.out.println(TXMCoreMessages.TSResult_14); return false;}
|
|
306 |
// if (!xmlFile.exists()) { System.out.println(TXMCoreMessages.TSResult_14); return false;}
|
|
307 | 307 |
// |
308 |
// if (method.equals("concordance_blocks")) { //$NON-NLS-1$
|
|
309 |
// ConcordanceBlocks builder = new ConcordanceBlocks();
|
|
310 |
// rez = builder.process(xmlFile, csvFile, cx, list, list2);
|
|
311 |
// } else { // XSL method
|
|
312 |
// File xslDir = new File(TBXPreferences.getInstance().getString(TBXPreferences.USER_TXM_HOME), "xsl"); //$NON-NLS-1$
|
|
313 |
// File xslFile = new File(xslDir, method+".xsl"); //$NON-NLS-1$
|
|
314 |
// if (!xslFile.exists()) {
|
|
315 |
// Log.severe(TXMCoreMessages.TSResult_7+xslFile);
|
|
316 |
// return false;
|
|
317 |
// }
|
|
308 |
// if (method.equals("concordance_blocks")) { //$NON-NLS-1$
|
|
309 |
// ConcordanceBlocks builder = new ConcordanceBlocks();
|
|
310 |
// rez = builder.process(xmlFile, csvFile, cx, list, list2);
|
|
311 |
// } else { // XSL method
|
|
312 |
// File xslDir = new File(TBXPreferences.getInstance().getString(TBXPreferences.USER_TXM_HOME), "xsl"); //$NON-NLS-1$
|
|
313 |
// File xslFile = new File(xslDir, method+".xsl"); //$NON-NLS-1$
|
|
314 |
// if (!xslFile.exists()) {
|
|
315 |
// Log.severe(TXMCoreMessages.TSResult_7+xslFile);
|
|
316 |
// return false;
|
|
317 |
// }
|
|
318 | 318 |
// |
319 |
// rez = toXml(csvFile, xmlFile, xslFile, punct, cx, list, list2); |
|
320 |
// } |
|
321 |
// xmlFile.delete(); // no more needed |
|
322 |
// return rez; |
|
323 |
// } |
|
324 |
|
|
325 |
public boolean toXml(File outfile) throws ExportException, ExportStopException |
|
326 |
{ |
|
319 |
// rez = toXml(csvFile, xmlFile, xslFile, punct, cx, list, list2); |
|
320 |
// } |
|
321 |
// xmlFile.delete(); // no more needed |
|
322 |
// return rez; |
|
323 |
// } |
|
324 |
|
|
325 |
public boolean toXml(File outfile) throws ExportException, ExportStopException { |
|
327 | 326 |
toXml(outfile, true, true); |
328 | 327 |
return true; |
329 | 328 |
} |
330 |
|
|
331 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch, |
|
332 |
boolean includeXmlHeader, boolean includeXMLSentenceStructure, |
|
333 |
boolean includeXMLMatchInformation, boolean refineSchema, int referSchema) throws ExportException, ExportStopException |
|
334 |
{ |
|
329 |
|
|
330 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch, |
|
331 |
boolean includeXmlHeader, boolean includeXMLSentenceStructure, |
|
332 |
boolean includeXMLMatchInformation, boolean refineSchema, int referSchema) throws ExportException, ExportStopException { |
|
335 | 333 |
this.tsCorpus.exporter.setConfiguration(includeXmlHeader, |
336 | 334 |
includeXMLSentenceStructure, |
337 | 335 |
includeXMLMatchInformation, |
... | ... | |
340 | 338 |
this.tsCorpus.exporter.saveMatchAsXML(result, outfile, includeNonMatch, includeMatch); |
341 | 339 |
return true; |
342 | 340 |
} |
343 |
|
|
344 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch) throws ExportException, ExportStopException |
|
345 |
{ |
|
341 |
|
|
342 |
public boolean toXml(File outfile, boolean includeNonMatch, boolean includeMatch) throws ExportException, ExportStopException { |
|
346 | 343 |
boolean includeXmlHeader = true; |
347 | 344 |
boolean includeXMLSentenceStructure = true; |
348 | 345 |
boolean includeXMLMatchInformation = true; |
349 | 346 |
boolean refineSchema = false; |
350 | 347 |
int referSchema = 0; |
351 |
|
|
348 |
|
|
352 | 349 |
return toXml(outfile, includeNonMatch, includeMatch, includeXmlHeader, includeXMLSentenceStructure, includeXMLMatchInformation, refineSchema, referSchema); |
353 | 350 |
} |
354 |
|
|
355 |
// @Deprecated
|
|
356 |
// private void injectPunct(File xmlfile)
|
|
357 |
// {
|
|
358 |
// System.out.println(TXMCoreMessages.TSResult_18);
|
|
359 |
// String corpus = this.tsCorpus.id.toUpperCase();
|
|
360 |
// try {
|
|
361 |
// Document dom = DomUtils.load(xmlfile);
|
|
362 |
// //System.out.println("Getting words of "+corpus);
|
|
363 |
// Object words = PunctInject.getWords(corpus, ""); //$NON-NLS-1$
|
|
351 |
|
|
352 |
// @Deprecated
|
|
353 |
// private void injectPunct(File xmlfile)
|
|
354 |
// {
|
|
355 |
// System.out.println(TXMCoreMessages.TSResult_18);
|
|
356 |
// String corpus = this.tsCorpus.id.toUpperCase();
|
|
357 |
// try {
|
|
358 |
// Document dom = DomUtils.load(xmlfile);
|
|
359 |
// //System.out.println("Getting words of "+corpus);
|
|
360 |
// Object words = PunctInject.getWords(corpus, ""); //$NON-NLS-1$
|
|
364 | 361 |
// |
365 |
// // int i = 0;
|
|
366 |
// // for(String[] word : (ArrayList<String[]>)words)
|
|
367 |
// // {
|
|
368 |
// // if(i++ % 10 == 0) System.out.println();
|
|
369 |
// // System.out.print("[\""+word[0]+"\", \""+word[1].replace("\"", "\\\"")+"\"], ");
|
|
370 |
// // }
|
|
371 |
// File outfile = File.createTempFile("punct", ".xml", xmlfile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
|
|
372 |
// //System.out.println("Processing "+xmlfile+" to "+outfile);
|
|
373 |
// Document doc = (Document) new PunctInject().process(dom, words);
|
|
374 |
// //System.out.println("Saving file");
|
|
375 |
// DomUtils.save(doc, outfile);
|
|
362 |
// // int i = 0;
|
|
363 |
// // for(String[] word : (ArrayList<String[]>)words)
|
|
364 |
// // {
|
|
365 |
// // if(i++ % 10 == 0) System.out.println();
|
|
366 |
// // System.out.print("[\""+word[0]+"\", \""+word[1].replace("\"", "\\\"")+"\"], ");
|
|
367 |
// // }
|
|
368 |
// File outfile = File.createTempFile("punct", ".xml", xmlfile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
|
|
369 |
// //System.out.println("Processing "+xmlfile+" to "+outfile);
|
|
370 |
// Document doc = (Document) new PunctInject().process(dom, words);
|
|
371 |
// //System.out.println("Saving file");
|
|
372 |
// DomUtils.save(doc, outfile);
|
|
376 | 373 |
// |
377 |
// // FileCopy.copy(outfile, new File(outfile.getParentFile(), "afterinject.xml"));
|
|
374 |
// // FileCopy.copy(outfile, new File(outfile.getParentFile(), "afterinject.xml"));
|
|
378 | 375 |
// |
379 |
// xmlfile.delete();
|
|
380 |
// outfile.renameTo(xmlfile);
|
|
381 |
// } catch (UnsupportedEncodingException e) {
|
|
382 |
// // TODO Auto-generated catch block
|
|
383 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
384 |
// } catch (FileNotFoundException e) {
|
|
385 |
// // TODO Auto-generated catch block
|
|
386 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
387 |
// } catch (ParserConfigurationException e) {
|
|
388 |
// // TODO Auto-generated catch block
|
|
389 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
390 |
// } catch (SAXException e) {
|
|
391 |
// // TODO Auto-generated catch block
|
|
392 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
393 |
// } catch (IOException e) {
|
|
394 |
// // TODO Auto-generated catch block
|
|
395 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
396 |
// }
|
|
376 |
// xmlfile.delete();
|
|
377 |
// outfile.renameTo(xmlfile);
|
|
378 |
// } catch (UnsupportedEncodingException e) {
|
|
379 |
// // TODO Auto-generated catch block
|
|
380 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
381 |
// } catch (FileNotFoundException e) {
|
|
382 |
// // TODO Auto-generated catch block
|
|
383 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
384 |
// } catch (ParserConfigurationException e) {
|
|
385 |
// // TODO Auto-generated catch block
|
|
386 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
387 |
// } catch (SAXException e) {
|
|
388 |
// // TODO Auto-generated catch block
|
|
389 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
390 |
// } catch (IOException e) {
|
|
391 |
// // TODO Auto-generated catch block
|
|
392 |
// org.txm.utils.logger.Log.printStackTrace(e);
|
|
393 |
// }
|
|
397 | 394 |
// |
398 | 395 |
// |
399 |
// }
|
|
400 |
|
|
401 |
public boolean toXml(File outFile, File xmlFile, File xslFile, boolean punct, int cxsize, List<String> list, List<String> list2) throws ExportException, ExportStopException, IOException, TransformerException
|
|
402 |
{ |
|
396 |
// }
|
|
397 |
|
|
398 |
public boolean toXml(File outFile, File xmlFile, File xslFile, boolean punct, int cxsize, List<String> list, List<String> list2) throws ExportException, ExportStopException, IOException, |
|
399 |
TransformerException {
|
|
403 | 400 |
ApplyXsl2 xslProc = new ApplyXsl2(xslFile); |
404 | 401 |
xslProc.setParam("cx", cxsize); //$NON-NLS-1$ |
405 | 402 |
xslProc.setParam("ntTypes", list.toString().replaceAll("[\\[\\],]", "")); // ["pos", "lem"] //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
406 | 403 |
xslProc.setParam("tTypes", list2.toString().replaceAll("[\\[\\],]", "")); // ["truc", "machin"] //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ |
407 | 404 |
if (xslProc.process(xmlFile, outFile)) { |
408 | 405 |
return outFile.exists(); |
409 |
} else { |
|
406 |
} |
|
407 |
else { |
|
410 | 408 |
return false; |
411 | 409 |
} |
412 | 410 |
} |
413 |
} |
|
411 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/editors/TIGERSearchEditor.java (revision 2931) | ||
---|---|---|
310 | 310 |
// this.syncExec(new Runnable() { |
311 | 311 |
// @Override |
312 | 312 |
// public void run() { |
313 |
if (ts.getNSentences() > 0) { |
|
314 |
sentCounterLabel.setText("/" + ts.getNSentences()); |
|
315 |
subCounterLabel.setText("/" + ts.getNSubGraph(0)); |
|
316 |
sentSpinner.setSelection(1); |
|
317 |
sentSpinner.setMaximum(ts.getNSentences()); |
|
318 |
subSpinner.setSelection(1); |
|
319 |
subSpinner.setMaximum(ts.getNSubGraph(0)); |
|
320 |
|
|
321 |
reloadGraph(); |
|
322 |
} |
|
313 | 323 |
|
314 |
sentCounterLabel.setText("/" + ts.getNSentences()); |
|
315 |
subCounterLabel.setText("/" + ts.getNSubGraph(0)); |
|
316 |
sentSpinner.setSelection(1); |
|
317 |
sentSpinner.setMaximum(ts.getNSentences()); |
|
318 |
subSpinner.setSelection(1); |
|
319 |
subSpinner.setMaximum(ts.getNSubGraph(0)); |
|
320 |
|
|
321 |
reloadGraph(); |
|
322 |
|
|
323 | 324 |
CorporaView.refresh(); |
324 | 325 |
CorporaView.expand(corpus); |
325 | 326 |
QueriesView.refresh(); |
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ImportTIGERAnnotations.java (revision 2931) | ||
---|---|---|
32 | 32 |
import java.io.RandomAccessFile; |
33 | 33 |
import java.nio.MappedByteBuffer; |
34 | 34 |
import java.nio.channels.FileChannel; |
35 |
import java.util.Arrays; |
|
36 | 35 |
|
36 |
import org.apache.log4j.BasicConfigurator; |
|
37 | 37 |
import org.eclipse.core.commands.AbstractHandler; |
38 | 38 |
import org.eclipse.core.commands.ExecutionEvent; |
39 | 39 |
import org.eclipse.core.commands.ExecutionException; |
... | ... | |
52 | 52 |
import org.txm.searchengine.ts.InternalCorpusQueryManagerLocal2; |
53 | 53 |
import org.txm.searchengine.ts.TSCorpus; |
54 | 54 |
import org.txm.searchengine.ts.TSCorpusManager; |
55 |
import org.txm.utils.ConsoleProgressBar; |
|
55 | 56 |
import org.txm.utils.DeleteDir; |
56 | 57 |
import org.txm.utils.io.FileCopy; |
58 |
import org.txm.utils.io.IOUtils; |
|
57 | 59 |
import org.txm.utils.logger.Log; |
58 | 60 |
|
59 | 61 |
import ims.tiger.corpus.Sentence; |
60 | 62 |
import ims.tiger.corpus.T_Node; |
61 | 63 |
import ims.tiger.index.reader.Index; |
62 | 64 |
import ims.tiger.index.reader.IndexException; |
65 |
import ims.tiger.index.writer.IndexBuilderErrorHandler; |
|
66 |
import ims.tiger.index.writer.SimpleErrorHandler; |
|
67 |
import ims.tiger.index.writer.XMLIndexing; |
|
63 | 68 |
import ims.tiger.query.api.QueryIndexException; |
64 | 69 |
import ims.tiger.query.processor.CorpusQueryProcessor; |
65 | 70 |
|
66 |
|
|
67 |
// TODO: Auto-generated Javadoc |
|
68 | 71 |
/** |
69 |
* open the TIGERSearch Editor |
|
72 |
* Import TIGERSearch annotations into a TXM corpus |
|
73 |
* |
|
74 |
* IF the corpus alreasy wontains TIGER annotations, they are replaced |
|
75 |
* |
|
76 |
* The annotations are given using a TIGERSEarch binary corpus OR a TIGER source directory using a "main.xml" file |
|
77 |
* |
|
70 | 78 |
* @author mdecorde. |
71 | 79 |
*/ |
72 | 80 |
public class ImportTIGERAnnotations extends AbstractHandler { |
73 |
|
|
81 |
|
|
74 | 82 |
public static final String ID = "org.txm.rcp.commands.function.ComputeTSIndex"; //$NON-NLS-1$ |
75 |
|
|
76 |
/* (non-Javadoc) |
|
83 |
|
|
84 |
/* |
|
85 |
* (non-Javadoc) |
|
77 | 86 |
* @see org.eclipse.core.commands.AbstractHandler#execute(org.eclipse.core.commands.ExecutionEvent) |
78 | 87 |
*/ |
79 | 88 |
@Override |
80 | 89 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
81 |
|
|
90 |
|
|
82 | 91 |
IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
83 |
|
|
92 |
|
|
84 | 93 |
Object s = selection.getFirstElement(); |
85 |
if (s instanceof CQPCorpus) {
|
|
86 |
CQPCorpus corpus = (CQPCorpus)s; |
|
94 |
if (s instanceof MainCorpus) {
|
|
95 |
CQPCorpus corpus = (CQPCorpus) s;
|
|
87 | 96 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
88 | 97 |
|
89 | 98 |
File tigerCorpusDirectory = null; |
90 | 99 |
DirectoryDialog dialog = new DirectoryDialog(HandlerUtil.getActiveShell(event), SWT.OPEN); |
91 | 100 |
String path = dialog.open(); |
92 | 101 |
if (path == null) { |
102 |
Log.warning("Aborting annotation importation."); |
|
93 | 103 |
return null; |
94 |
} else { |
|
104 |
} |
|
105 |
else { |
|
95 | 106 |
tigerCorpusDirectory = new File(path); |
96 | 107 |
} |
97 | 108 |
|
98 |
File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger");
|
|
109 |
File tigerDirectory = new File(mainCorpus.getProjectDirectory(), "tiger");
|
|
99 | 110 |
File tigerCorpusExistingDirectory = new File(tigerDirectory, tigerCorpusDirectory.getName()); |
100 | 111 |
if (tigerCorpusExistingDirectory.exists()) { |
101 | 112 |
boolean doIt = MessageDialog.openConfirm(HandlerUtil.getActiveShell(event), "Replace existing annotations", "TIGERSearch annotations already exists, replace them ?"); |
102 | 113 |
if (!doIt) { |
103 |
Log.warning("Aborting annotations import.");
|
|
114 |
Log.warning("Aborting annotation importation.");
|
|
104 | 115 |
return null; |
105 | 116 |
} |
106 | 117 |
} |
118 |
|
|
119 |
if (new File(tigerCorpusDirectory, "word.lexicon").exists() && new File(tigerCorpusDirectory, "corpus_config.xml").exists()) { |
|
120 |
// ok this is a TIGERSearch binary corpus |
|
121 |
} |
|
122 |
else { |
|
123 |
|
|
124 |
// need to build a TIGERSearch binary corpus |
|
125 |
File tigerBinaryCorpusDirectory = new File(tigerCorpusDirectory, "tiger"); |
|
126 |
if (!buildTIGERCorpus(mainCorpus, tigerCorpusDirectory, tigerBinaryCorpusDirectory)) { |
|
127 |
Log.warning("Aborting annotation importation."); |
|
128 |
return null; |
|
129 |
} |
|
130 |
tigerCorpusDirectory = new File(tigerBinaryCorpusDirectory, corpus.getName()); |
|
131 |
} |
|
132 |
|
|
107 | 133 |
try { |
108 |
return importAnnotations(mainCorpus, tigerCorpusDirectory); |
|
109 |
} catch (Exception e) {
|
|
110 |
// TODO Auto-generated catch block
|
|
134 |
return importAnnotations(mainCorpus, tigerCorpusDirectory, "editionId");
|
|
135 |
} |
|
136 |
catch (Exception e) {
|
|
111 | 137 |
e.printStackTrace(); |
112 | 138 |
return null; |
113 | 139 |
} |
114 |
} else { |
|
140 |
} |
|
141 |
else { |
|
115 | 142 |
Log.warning("Selection is not a corpus. Aborting."); |
116 | 143 |
return null; |
117 | 144 |
} |
118 | 145 |
} |
119 |
|
|
146 |
|
|
147 |
private boolean buildTIGERCorpus(MainCorpus corpus, File sourceDirectory, File tigerDir) { |
|
148 |
tigerDir.mkdirs(); |
|
149 |
|
|
150 |
File configfile = new File(tigerDir, "tigersearch.logprop"); |
|
151 |
if (!configfile.exists()) { |
|
152 |
TSCorpus.createLogPropFile(tigerDir); |
|
153 |
} |
|
154 |
|
|
155 |
BasicConfigurator.configure(); |
|
156 |
File master = new File(sourceDirectory, "main.xml"); |
|
157 |
if (!master.exists()) master = new File(sourceDirectory, "master.xml"); |
|
158 |
|
|
159 |
if (!master.exists()) { |
|
160 |
Log.warning("Error: Can't create TIGERSearch corpus: no main or master file found in " + sourceDirectory); |
|
161 |
return false; |
|
162 |
} |
|
163 |
String uri = master.getAbsolutePath(); // TIGER corpus source root file |
|
164 |
File tigerBinDir = new File(tigerDir, corpus.getName()); |
|
165 |
tigerBinDir.mkdirs(); |
|
166 |
try { |
|
167 |
IndexBuilderErrorHandler handler = new SimpleErrorHandler(tigerBinDir.getAbsolutePath()) { |
|
168 |
|
|
169 |
@Override |
|
170 |
public void setMessage(String message) {} |
|
171 |
|
|
172 |
@Override |
|
173 |
public void setNumberOfSentences(int number) {} |
|
174 |
|
|
175 |
@Override |
|
176 |
public void setProgressBar(int value) {} |
|
177 |
}; |
|
178 |
|
|
179 |
XMLIndexing indexing = new XMLIndexing(corpus.getName(), uri, tigerBinDir.getAbsolutePath(), handler, false); |
|
180 |
|
|
181 |
indexing.startIndexing(); |
|
182 |
|
|
183 |
File logs = new File(tigerBinDir, "indexing.log"); |
|
184 |
|
|
185 |
String txt = IOUtils.getText(logs); |
|
186 |
if (txt.contains("Error in corpus graph ")) { |
|
187 |
Log.warning("Error while importing TIGER corpus: " + txt); |
|
188 |
return false; |
|
189 |
} |
|
190 |
} |
|
191 |
catch (Exception e) { |
|
192 |
System.out.println(e.getMessage()); |
|
193 |
return false; |
|
194 |
} |
|
195 |
return true; |
|
196 |
} |
|
197 |
|
|
120 | 198 |
/** |
121 | 199 |
* |
122 | 200 |
* if aTIGER corpus with the same name already exists, it is replaced |
... | ... | |
124 | 202 |
* @param corpus |
125 | 203 |
* @param tigerCorpusDirectory |
126 | 204 |
* @return the number of imported annotations |
127 |
* @throws IndexException
|
|
128 |
* @throws QueryIndexException
|
|
129 |
* @throws CqiClientException
|
|
130 |
* @throws CqiServerError
|
|
131 |
* @throws IOException
|
|
132 |
* @throws UnexpectedAnswerException
|
|
205 |
* @throws IndexException |
|
206 |
* @throws QueryIndexException |
|
207 |
* @throws CqiClientException |
|
208 |
* @throws CqiServerError |
|
209 |
* @throws IOException |
|
210 |
* @throws UnexpectedAnswerException |
|
133 | 211 |
*/ |
134 |
public static int importAnnotations(MainCorpus corpus, File tigerCorpusDirectory) throws IndexException, QueryIndexException, UnexpectedAnswerException, IOException, CqiServerError, CqiClientException { |
|
212 |
public static int importAnnotations(MainCorpus corpus, File tigerCorpusDirectory, String wordIdAttribute) throws IndexException, QueryIndexException, UnexpectedAnswerException, IOException, |
|
213 |
CqiServerError, |
|
214 |
CqiClientException { |
|
135 | 215 |
|
136 | 216 |
// TXM corpus files |
137 | 217 |
File tigerDirectory = new File(corpus.getProjectDirectory(), "tiger"); |
138 |
File tigerCorpusExistingDirectory = new File(tigerDirectory, tigerCorpusDirectory.getName());
|
|
218 |
File tigerCorpusExistingDirectory = new File(tigerDirectory, corpus.getName());
|
|
139 | 219 |
DeleteDir.deleteDirectory(tigerCorpusExistingDirectory); |
140 | 220 |
tigerCorpusExistingDirectory.mkdirs(); |
141 | 221 |
|
... | ... | |
147 | 227 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
148 | 228 |
|
149 | 229 |
TSCorpusManager manager = new TSCorpusManager(tigerCorpusDirectory.getParentFile(), configfile); |
150 |
|
|
230 |
|
|
151 | 231 |
TSCorpus tcorpus = manager.getCorpus(tigerCorpusDirectory.getName()); |
152 | 232 |
InternalCorpusQueryManagerLocal2 tigermanager = tcorpus.manager; |
153 | 233 |
CorpusQueryProcessor processor = tigermanager.getQueryProcessor(); |
154 | 234 |
|
155 | 235 |
Index index = processor.getIndex(); |
156 | 236 |
int size = 0; |
157 |
for (int nr = 0 ; nr < index.getNumberOfGraphs() ; nr++) {
|
|
237 |
for (int nr = 0; nr < index.getNumberOfGraphs(); nr++) {
|
|
158 | 238 |
size += index.getNumberOfTNodes(nr); |
159 | 239 |
} |
160 | 240 |
|
161 | 241 |
if (size == 0) { |
162 |
Log.warning("No word found in the TIGERSearch corpus: "+tigerCorpusDirectory+". Aborting.");
|
|
242 |
Log.warning("No word found in the TIGERSearch corpus: " + tigerCorpusDirectory + ". Aborting.");
|
|
163 | 243 |
return 0; |
164 | 244 |
} |
165 | 245 |
|
166 |
Log.info("Importing "+size+" word annotations...");
|
|
246 |
Log.info("Importing " + size + " word annotations...");
|
|
167 | 247 |
|
168 | 248 |
// compute start position of sentences |
169 | 249 |
int[] starts = new int[index.getNumberOfGraphs()]; |
170 |
for (int i = 0 ; i < index.getNumberOfGraphs() ; i++) {
|
|
250 |
for (int i = 0; i < index.getNumberOfGraphs(); i++) {
|
|
171 | 251 |
starts[i] = 0; |
172 | 252 |
if (i > 0) { |
173 |
starts[i] += index.getNumberOfTNodes(i-1) + starts[i-1];
|
|
253 |
starts[i] += index.getNumberOfTNodes(i - 1) + starts[i - 1];
|
|
174 | 254 |
} |
175 | 255 |
} |
176 | 256 |
|
177 | 257 |
File offsetsFile = new File(tigerCorpusExistingDirectory, "offsets.data"); |
178 | 258 |
RandomAccessFile offsetsRAFile = new RandomAccessFile(offsetsFile, "rw"); |
179 | 259 |
FileChannel offsetsFileChannel = offsetsRAFile.getChannel(); |
180 |
MappedByteBuffer offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_WRITE, 0, size*Integer.BYTES);
|
|
181 |
//out.putInt(positions[i]) |
|
182 |
|
|
260 |
MappedByteBuffer offsetsMapped = offsetsFileChannel.map(FileChannel.MapMode.READ_WRITE, 0, size * Integer.BYTES);
|
|
261 |
// out.putInt(positions[i])
|
|
262 |
|
|
183 | 263 |
File presencesFile = new File(tigerCorpusExistingDirectory, "presences.data"); |
184 | 264 |
RandomAccessFile presencesRAFile = new RandomAccessFile(presencesFile, "rw"); |
185 | 265 |
FileChannel presencesFileChannel = presencesRAFile.getChannel(); |
186 | 266 |
MappedByteBuffer presencesMapped = presencesFileChannel.map(FileChannel.MapMode.READ_WRITE, 0, size); |
187 |
|
|
188 |
int numberOfWordsAnntoated = 0; |
|
189 | 267 |
|
268 |
int numberOfWordsAnnotated = 0; |
|
269 |
|
|
190 | 270 |
// for each sentence |
191 |
for (int nr = 0 ; nr < index.getNumberOfGraphs() ; nr++) { |
|
271 |
ConsoleProgressBar cpb = new ConsoleProgressBar(index.getNumberOfGraphs()); |
|
272 |
for (int nr = 0; nr < index.getNumberOfGraphs(); nr++) { |
|
273 |
cpb.tick(); |
|
192 | 274 |
int sent_size = index.getNumberOfTNodes(nr); |
193 | 275 |
Sentence sent = tcorpus.manager.getSentence(nr); |
194 | 276 |
|
195 | 277 |
String[] ids = new String[sent_size]; |
196 | 278 |
int[] tigerPositions = new int[sent_size]; |
197 |
for (int t = 0 ; t < sent_size ; t++) { |
|
198 |
T_Node terminal = (T_Node)sent.getTerminalAt(t); |
|
199 |
ids[t] = terminal.getFeature("editionId"); |
|
279 |
for (int t = 0; t < sent_size; t++) { |
|
280 |
T_Node terminal = (T_Node) sent.getTerminalAt(t); |
|
281 |
ids[t] = terminal.getFeature(wordIdAttribute); |
Formats disponibles : Unified diff