Révision 687
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/handlers/ReferencerToConc.java (revision 687) | ||
|---|---|---|
| 40 | 40 |
import org.eclipse.ui.handlers.HandlerUtil; |
| 41 | 41 |
import org.txm.concordance.core.functions.Concordance; |
| 42 | 42 |
import org.txm.concordance.rcp.editors.ConcordanceEditor; |
| 43 |
import org.txm.rcp.RCPMessages; |
|
| 44 | 43 |
import org.txm.rcp.editors.TXMResultEditorInput; |
| 44 |
import org.txm.rcp.messages.TXMUIMessages; |
|
| 45 | 45 |
import org.txm.referencer.core.functions.Referencer; |
| 46 | 46 |
import org.txm.referencer.core.functions.Referencer.Line; |
| 47 | 47 |
import org.txm.referencer.rcp.editors.ReferencerEditor; |
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/preferences/ReferencerPreferencePage.java (revision 687) | ||
|---|---|---|
| 28 | 28 |
package org.txm.referencer.rcp.preferences; |
| 29 | 29 |
|
| 30 | 30 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
| 31 |
import org.txm.rcp.RCPMessages; |
|
| 32 | 31 |
import org.txm.rcp.preferences.RCPPreferences; |
| 33 | 32 |
import org.txm.rcp.preferences.RCPPreferencesPage; |
| 33 |
import org.txm.referencer.rcp.messages.ReferencerUIMessages; |
|
| 34 | 34 |
|
| 35 | 35 |
/** |
| 36 | 36 |
* This class represents a preference page that is contributed to the |
| ... | ... | |
| 53 | 53 |
*/ |
| 54 | 54 |
public ReferencerPreferencePage() {
|
| 55 | 55 |
super(); |
| 56 |
setTitle(RCPMessages.ReferencerPreferencePage_2);
|
|
| 56 |
setTitle(ReferencerUIMessages.ReferencerPreferencePage_2);
|
|
| 57 | 57 |
} |
| 58 | 58 |
|
| 59 | 59 |
/** |
| ... | ... | |
| 64 | 64 |
@Override |
| 65 | 65 |
public void createFieldEditors() {
|
| 66 | 66 |
|
| 67 |
referencer_sorttype = new BooleanFieldEditor(RCPPreferences.SORTBYFREQ, |
|
| 68 |
RCPMessages.ReferencerPreferencePage_3, getFieldEditorParent()); |
|
| 67 |
referencer_sorttype = new BooleanFieldEditor(RCPPreferences.SORTBYFREQ, ReferencerUIMessages.ReferencerPreferencePage_3, getFieldEditorParent()); |
|
| 69 | 68 |
|
| 70 | 69 |
addField(referencer_sorttype); |
| 71 | 70 |
|
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/ReferencerUIMessages.java (revision 687) | ||
|---|---|---|
| 1 |
|
|
| 2 |
package org.txm.referencer.rcp.messages; |
|
| 3 |
|
|
| 4 |
import org.eclipse.osgi.util.NLS; |
|
| 5 |
import org.txm.utils.messages.Utf8NLS; |
|
| 6 |
|
|
| 7 |
public class ReferencerUIMessages extends NLS {
|
|
| 8 |
|
|
| 9 |
private static final String BUNDLE_NAME = "org.txm.referencer.rcp.messages.messages"; //$NON-NLS-1$ |
|
| 10 |
|
|
| 11 |
|
|
| 12 |
public static String ReferencerEditor_0; |
|
| 13 |
public static String ReferencerEditor_10; |
|
| 14 |
public static String ReferencerEditor_11; |
|
| 15 |
public static String ReferencerEditor_12; |
|
| 16 |
public static String ReferencerEditor_13; |
|
| 17 |
public static String ReferencerEditor_17; |
|
| 18 |
public static String ReferencerEditor_19; |
|
| 19 |
public static String ReferencerEditor_21; |
|
| 20 |
public static String ReferencerEditor_23; |
|
| 21 |
public static String ReferencerEditor_25; |
|
| 22 |
public static String ReferencerEditor_5; |
|
| 23 |
public static String ReferencerEditor_6; |
|
| 24 |
public static String ReferencerEditor_7; |
|
| 25 |
public static String ReferencerEditor_8; |
|
| 26 |
public static String ReferencerPreferencePage_2; |
|
| 27 |
public static String ReferencerPreferencePage_3; |
|
| 28 |
|
|
| 29 |
|
|
| 30 |
static {
|
|
| 31 |
// initialize resource bundle |
|
| 32 |
Utf8NLS.initializeMessages(BUNDLE_NAME, ReferencerUIMessages.class); |
|
| 33 |
} |
|
| 34 |
|
|
| 35 |
private ReferencerUIMessages() {
|
|
| 36 |
} |
|
| 37 |
} |
|
| 0 | 38 | |
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/messages_ru.properties (revision 687) | ||
|---|---|---|
| 1 |
|
|
| 2 |
ReferencerEditor_0 = Расчет ссылок <{0}> в {1} с приоритетом {2} по шаблону {3}
|
|
| 3 |
ReferencerEditor_10 = Получение индексов |
|
| 4 |
ReferencerEditor_11 = Ссылки |
|
| 5 |
ReferencerEditor_12 = Группировка позиций по индексам |
|
| 6 |
ReferencerEditor_13 = Обновление интерфейса |
|
| 7 |
ReferencerEditor_17 = Показ первой страницы результатов |
|
| 8 |
ReferencerEditor_19 = Показ последней страницы результатов |
|
| 9 |
ReferencerEditor_21 = Показ следующей страницы результатов |
|
| 10 |
ReferencerEditor_23 = Показ предыдущей страницы результатов |
|
| 11 |
ReferencerEditor_25 = Открытие конкорданса |
|
| 12 |
ReferencerEditor_5 = Расчет ссылок {0}
|
|
| 13 |
ReferencerEditor_6 = Шаблон ссылок |
|
| 14 |
ReferencerEditor_7 = Ни одного результата |
|
| 15 |
ReferencerEditor_8 = Получение употреблений |
|
| 16 |
|
|
| 17 |
ReferencerPreferencePage_2 = Ссылки |
|
| 18 |
ReferencerPreferencePage_3 = Сортировать ссылки по частотности |
|
| 0 | 19 | |
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/messages_fr.properties (revision 687) | ||
|---|---|---|
| 1 |
|
|
| 2 |
ReferencerEditor_0 = Calcul des références de <{0}> dans {1} avec la propriété {2} et le patron {3}
|
|
| 3 |
ReferencerEditor_10 = Récupération des index |
|
| 4 |
ReferencerEditor_11 = Références |
|
| 5 |
ReferencerEditor_12 = Regroupement des positions par index |
|
| 6 |
ReferencerEditor_13 = Mise à jour de l'interface |
|
| 7 |
ReferencerEditor_17 = Affichage de la première page de résultats |
|
| 8 |
ReferencerEditor_19 = Affichage de la dernière page de résultats |
|
| 9 |
ReferencerEditor_21 = Affichage de la page de résultats suivante |
|
| 10 |
ReferencerEditor_23 = Affichage de la page de résultats précédente |
|
| 11 |
ReferencerEditor_25 = Ouverture de la concordance |
|
| 12 |
ReferencerEditor_5 = Calcul des références de {0}
|
|
| 13 |
ReferencerEditor_6 = Patron des références |
|
| 14 |
ReferencerEditor_7 = Aucun résultat |
|
| 15 |
ReferencerEditor_8 = Récupération des occurrences |
|
| 16 |
|
|
| 17 |
ReferencerPreferencePage_2 = Références |
|
| 18 |
ReferencerPreferencePage_3 = Ordonner les références par fréquence |
|
| 0 | 19 | |
| tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/editors/ReferencerEditor.java (revision 687) | ||
|---|---|---|
| 31 | 31 |
import java.util.Collections; |
| 32 | 32 |
import java.util.List; |
| 33 | 33 |
|
| 34 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 35 |
import org.eclipse.jface.action.MenuManager; |
|
| 36 | 34 |
import org.eclipse.jface.viewers.TableViewer; |
| 37 | 35 |
import org.eclipse.osgi.util.NLS; |
| 38 | 36 |
import org.eclipse.swt.SWT; |
| ... | ... | |
| 50 | 48 |
import org.eclipse.swt.widgets.Button; |
| 51 | 49 |
import org.eclipse.swt.widgets.Composite; |
| 52 | 50 |
import org.eclipse.swt.widgets.Display; |
| 53 |
import org.eclipse.swt.widgets.Menu; |
|
| 54 | 51 |
import org.eclipse.swt.widgets.TableColumn; |
| 55 | 52 |
import org.eclipse.ui.IEditorInput; |
| 56 | 53 |
import org.eclipse.ui.IEditorSite; |
| 57 | 54 |
import org.eclipse.ui.PartInitException; |
| 55 |
import org.txm.core.messages.TXMCoreMessages; |
|
| 58 | 56 |
import org.txm.core.preferences.TXMPreferences; |
| 59 | 57 |
import org.txm.core.results.TXMResult; |
| 60 |
import org.txm.rcp.RCPMessages; |
|
| 61 | 58 |
import org.txm.rcp.StatusLine; |
| 62 | 59 |
import org.txm.rcp.editors.TXMEditor; |
| 63 | 60 |
import org.txm.rcp.editors.TXMResultEditorInput; |
| 64 | 61 |
import org.txm.rcp.editors.TableKeyListener; |
| 62 |
import org.txm.rcp.messages.TXMUIMessages; |
|
| 65 | 63 |
import org.txm.rcp.preferences.RCPPreferences; |
| 66 | 64 |
import org.txm.rcp.swt.widget.NavigationWidget; |
| 67 | 65 |
import org.txm.rcp.swt.widget.PropertiesSelector; |
| ... | ... | |
| 71 | 69 |
import org.txm.referencer.core.functions.Referencer; |
| 72 | 70 |
import org.txm.referencer.core.functions.Referencer.Line; |
| 73 | 71 |
import org.txm.referencer.rcp.handlers.ReferencerToConc; |
| 72 |
import org.txm.referencer.rcp.messages.ReferencerUIMessages; |
|
| 74 | 73 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
| 75 | 74 |
import org.txm.searchengine.cqp.corpus.Corpus; |
| 76 | 75 |
import org.txm.searchengine.cqp.corpus.Property; |
| ... | ... | |
| 185 | 184 |
//setPartName(referencer.getName()); //$NON-NLS-1$ |
| 186 | 185 |
|
| 187 | 186 |
viewer.getTable().setFocus(); |
| 188 |
System.out.println(NLS.bind(RCPMessages.CreatePartition_5, referencer.getNLines(), referencer.getV()));
|
|
| 187 |
System.out.println(NLS.bind(TXMUIMessages.CreatePartition_5, referencer.getNLines(), referencer.getV()));
|
|
| 189 | 188 |
// } |
| 190 | 189 |
// }); |
| 191 | 190 |
// |
| ... | ... | |
| 251 | 250 |
|
| 252 | 251 |
// [Search] |
| 253 | 252 |
Button go = new Button(paramArea, SWT.PUSH); |
| 254 |
go.setText(RCPMessages.SEARCH);
|
|
| 253 |
go.setText(TXMUIMessages.SEARCH);
|
|
| 255 | 254 |
go.setLayoutData(new GridData(GridData.FILL, GridData.FILL, |
| 256 | 255 |
false, false)); |
| 257 | 256 |
Font f = go.getFont(); |
| ... | ... | |
| 276 | 275 |
patternArea = new PropertiesSelector<StructuralUnitProperty>(paramArea, SWT.NONE); |
| 277 | 276 |
patternArea.setLayoutData(new GridData(GridData.FILL, GridData.FILL, false, false,3,1)); |
| 278 | 277 |
patternArea.setLayout(new GridLayout(3, false)); |
| 279 |
patternArea.setText(RCPMessages.ReferencerEditor_6);
|
|
| 278 |
patternArea.setText(ReferencerUIMessages.ReferencerEditor_6);
|
|
| 280 | 279 |
patternArea.addSelectionListener(new SelectionListener() {
|
| 281 | 280 |
@Override |
| 282 | 281 |
public void widgetSelected(SelectionEvent e) {
|
| ... | ... | |
| 299 | 298 |
|
| 300 | 299 |
@Override |
| 301 | 300 |
public void widgetSelected(SelectionEvent e) {
|
| 302 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_17);
|
|
| 301 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_17);
|
|
| 303 | 302 |
fillDisplayArea(0, lineperpage); |
| 304 | 303 |
viewer.getTable().select(0); |
| 305 | 304 |
viewer.getTable().showSelection(); |
| ... | ... | |
| 313 | 312 |
|
| 314 | 313 |
@Override |
| 315 | 314 |
public void widgetSelected(SelectionEvent e) {
|
| 316 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_19);
|
|
| 315 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_19);
|
|
| 317 | 316 |
fillDisplayArea(referencer.getNLines() |
| 318 | 317 |
- lineperpage, referencer.getNLines()); |
| 319 | 318 |
viewer.getTable().select(0); |
| ... | ... | |
| 328 | 327 |
|
| 329 | 328 |
@Override |
| 330 | 329 |
public void widgetSelected(SelectionEvent e) {
|
| 331 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_21);
|
|
| 330 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_21);
|
|
| 332 | 331 |
fillDisplayArea(topLine + lineperpage, |
| 333 | 332 |
bottomLine + lineperpage); |
| 334 | 333 |
viewer.getTable().select(0); |
| ... | ... | |
| 343 | 342 |
|
| 344 | 343 |
@Override |
| 345 | 344 |
public void widgetSelected(SelectionEvent e) {
|
| 346 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_23);
|
|
| 345 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_23);
|
|
| 347 | 346 |
fillDisplayArea(topLine - lineperpage, |
| 348 | 347 |
bottomLine - lineperpage); |
| 349 | 348 |
viewer.getTable().select(0); |
| ... | ... | |
| 384 | 383 |
.getSelection(), e.y); |
| 385 | 384 |
int col = getPointedColumn(mouseposition); |
| 386 | 385 |
if (col == 1) {
|
| 387 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_25);
|
|
| 386 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_25);
|
|
| 388 | 387 |
sendSelectionToConc(); |
| 389 | 388 |
} else if (col == 2) {
|
| 390 | 389 |
//System.out.println("Send to edition ?"); //$NON-NLS-1$
|
| ... | ... | |
| 397 | 396 |
nColumn.pack(); |
| 398 | 397 |
|
| 399 | 398 |
unitColumn = new TableColumn(viewer.getTable(), SWT.LEFT); |
| 400 |
unitColumn.setText(RCPMessages.ReferencerEditor_9);
|
|
| 401 |
unitColumn.setToolTipText(RCPMessages.ReferencerEditor_9);
|
|
| 399 |
unitColumn.setText(TXMCoreMessages.common_units);
|
|
| 400 |
unitColumn.setToolTipText(TXMCoreMessages.common_units);
|
|
| 402 | 401 |
unitColumn.setWidth(200); |
| 403 | 402 |
unitColumn.addSelectionListener(new SelectionListener() {
|
| 404 | 403 |
@Override |
| ... | ... | |
| 412 | 411 |
}); |
| 413 | 412 |
|
| 414 | 413 |
freqColumn = new TableColumn(viewer.getTable(), SWT.LEFT); |
| 415 |
freqColumn.setText(RCPMessages.ReferencerEditor_11);
|
|
| 416 |
freqColumn.setToolTipText(RCPMessages.ReferencerEditor_11);
|
|
| 414 |
freqColumn.setText(ReferencerUIMessages.ReferencerEditor_11);
|
|
| 415 |
freqColumn.setToolTipText(ReferencerUIMessages.ReferencerEditor_11);
|
|
| 417 | 416 |
freqColumn.setWidth(100); |
| 418 | 417 |
freqColumn.addSelectionListener(new SelectionListener() {
|
| 419 | 418 |
@Override |
| tmp/org.txm.querycooccurrences.rcp/src/org/txm/rcp/commands/function/ComputeQueryCooccurrence.java (revision 687) | ||
|---|---|---|
| 57 | 57 |
import org.eclipse.swt.widgets.Shell; |
| 58 | 58 |
import org.eclipse.swt.widgets.Spinner; |
| 59 | 59 |
import org.eclipse.ui.handlers.HandlerUtil; |
| 60 |
import org.txm.core.messages.TXMCoreMessages; |
|
| 60 | 61 |
import org.txm.functions.coocmatrix.QueryCooccurrence; |
| 61 | 62 |
//import org.txm.functions.queryindex.*; |
| 62 | 63 |
import org.txm.rcp.JobsTimer; |
| 63 |
import org.txm.rcp.RCPMessages;
|
|
| 64 |
import org.txm.rcp.messages.TXMUIMessages;
|
|
| 64 | 65 |
import org.txm.rcp.utils.JobHandler; |
| 65 | 66 |
import org.txm.rcp.views.QueriesView; |
| 66 | 67 |
import org.txm.rcp.views.corpora.CorporaView; |
| ... | ... | |
| 100 | 101 |
JobsTimer.start(); |
| 101 | 102 |
|
| 102 | 103 |
if (!(element instanceof Corpus)) {
|
| 103 |
System.out.println(RCPMessages.ComputeCooccurrences_1+ element);
|
|
| 104 |
System.out.println(TXMUIMessages.ComputeCooccurrences_1+ element);
|
|
| 104 | 105 |
return Status.CANCEL_STATUS; |
| 105 | 106 |
} |
| 106 | 107 |
Corpus corpus = ((Corpus)element); |
| ... | ... | |
| 121 | 122 |
|
| 122 | 123 |
if (cooc == null) return null; |
| 123 | 124 |
|
| 124 |
System.out.println(RCPMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
| 125 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
| 125 | 126 |
|
| 126 | 127 |
syncExec(new Runnable() {
|
| 127 | 128 |
@Override |
| ... | ... | |
| 136 | 137 |
return Status.CANCEL_STATUS; |
| 137 | 138 |
} catch (Exception e) {
|
| 138 | 139 |
Log.printStackTrace(e); |
| 139 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 140 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 140 | 141 |
} catch (Exception e1) { e1.printStackTrace(); }
|
| 141 | 142 |
} finally {
|
| 142 | 143 |
monitor.done(); |
| ... | ... | |
| 162 | 163 |
getQueriesAndNames(dialog.getQueryFile1(), queries1, names1); |
| 163 | 164 |
getQueriesAndNames(dialog.getQueryFile2(), queries2, names2); |
| 164 | 165 |
|
| 165 |
System.out.println(RCPMessages.ProgressionDialog_8+queries1);
|
|
| 166 |
System.out.println(RCPMessages.MergeCols_5+names1);
|
|
| 166 |
System.out.println(TXMCoreMessages.common_queries + queries1);
|
|
| 167 |
System.out.println(TXMUIMessages.MergeCols_5+names1);
|
|
| 167 | 168 |
|
| 168 | 169 |
QueryCooccurrence cooc = null; |
| 169 | 170 |
try {
|
| ... | ... | |
| 171 | 172 |
System.out.println("Number of queries: "+nqueries);
|
| 172 | 173 |
System.out.println("Estimated time: "+(nqueries*reqtime/1000)+" secs");
|
| 173 | 174 |
cooc = new QueryCooccurrence(corpus, queries1, names1, queries2, names2, dist, min, struct); |
| 174 |
System.out.println(RCPMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
| 175 |
System.out.println(RCPMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
| 176 |
System.out.println(RCPMessages.ComputeCoocMatrix_3+dist);
|
|
| 175 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
| 176 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
| 177 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_3+dist);
|
|
| 177 | 178 |
} catch (Exception e) {
|
| 178 | 179 |
// TODO Auto-generated catch block |
| 179 | 180 |
Log.printStackTrace(e); |
| 180 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 181 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 181 | 182 |
} catch (Exception e1) { e1.printStackTrace(); }
|
| 182 | 183 |
} |
| 183 | 184 |
return cooc; |
| ... | ... | |
| 239 | 240 |
@Override |
| 240 | 241 |
protected void configureShell(Shell newShell) {
|
| 241 | 242 |
super.configureShell(newShell); |
| 242 |
newShell.setText(RCPMessages.ComputeCoocMatrix_7);
|
|
| 243 |
newShell.setText(TXMUIMessages.ComputeCoocMatrix_7);
|
|
| 243 | 244 |
} |
| 244 | 245 |
|
| 245 | 246 |
/* (non-Javadoc) |
| ... | ... | |
| 254 | 255 |
composite.setLayout(layout); |
| 255 | 256 |
|
| 256 | 257 |
Label anaPropLabel = new Label(composite, SWT.NONE); |
| 257 |
anaPropLabel.setText(RCPMessages.ParaBrowserEditor_5);
|
|
| 258 |
anaPropLabel.setText(TXMCoreMessages.common_structure);
|
|
| 258 | 259 |
anaPropLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, false, false)); |
| 259 | 260 |
|
| 260 | 261 |
propsCombo = new org.eclipse.swt.widgets.List(composite, SWT.READ_ONLY); |
| ... | ... | |
| 277 | 278 |
} |
| 278 | 279 |
|
| 279 | 280 |
Label distLabel = new Label(composite, SWT.NONE); |
| 280 |
distLabel.setText(RCPMessages.ComputeCoocMatrix_10);
|
|
| 281 |
distLabel.setText(TXMUIMessages.ComputeCoocMatrix_10);
|
|
| 281 | 282 |
distLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
| 282 | 283 |
false, false)); |
| 283 | 284 |
distSpinner = new Spinner(composite, SWT.BORDER); |
| ... | ... | |
| 286 | 287 |
distSpinner.setSelection(10); |
| 287 | 288 |
|
| 288 | 289 |
Label minLabel = new Label(composite, SWT.NONE); |
| 289 |
minLabel.setText(RCPMessages.ComputeCoocMatrix_11);
|
|
| 290 |
minLabel.setText(TXMUIMessages.ComputeCoocMatrix_11);
|
|
| 290 | 291 |
minLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
| 291 | 292 |
false, false)); |
| 292 | 293 |
minSpinner = new Spinner(composite, SWT.BORDER); |
| tmp/org.txm.querycooccurrences.rcp/src/org/txm/rcp/commands/function/ComputeQueryAutoCooccurrence.java (revision 687) | ||
|---|---|---|
| 53 | 53 |
import org.eclipse.swt.widgets.Shell; |
| 54 | 54 |
import org.eclipse.swt.widgets.Spinner; |
| 55 | 55 |
import org.eclipse.ui.handlers.HandlerUtil; |
| 56 |
import org.txm.core.messages.TXMCoreMessages; |
|
| 56 | 57 |
import org.txm.functions.coocmatrix.QueryAutoCooccurrence; |
| 57 | 58 |
import org.txm.functions.queryindex.QueryIndex; |
| 58 | 59 |
import org.txm.functions.queryindex.QueryIndexLine; |
| 59 | 60 |
import org.txm.rcp.JobsTimer; |
| 60 |
import org.txm.rcp.RCPMessages;
|
|
| 61 |
import org.txm.rcp.messages.TXMUIMessages;
|
|
| 61 | 62 |
import org.txm.rcp.utils.JobHandler; |
| 62 | 63 |
import org.txm.rcp.views.QueriesView; |
| 63 | 64 |
import org.txm.rcp.views.corpora.CorporaView; |
| ... | ... | |
| 98 | 99 |
JobsTimer.start(); |
| 99 | 100 |
|
| 100 | 101 |
if (!(element instanceof QueryIndex)) {
|
| 101 |
System.out.println(RCPMessages.ComputeCooccurrences_1+ element);
|
|
| 102 |
System.out.println(TXMUIMessages.ComputeCooccurrences_1+ element);
|
|
| 102 | 103 |
return Status.CANCEL_STATUS; |
| 103 | 104 |
} |
| 104 | 105 |
QueryIndex qi = ((QueryIndex)element); |
| ... | ... | |
| 120 | 121 |
|
| 121 | 122 |
if (cooc == null) return null; |
| 122 | 123 |
|
| 123 |
System.out.println(RCPMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
| 124 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
| 124 | 125 |
|
| 125 | 126 |
syncExec(new Runnable() {
|
| 126 | 127 |
@Override |
| ... | ... | |
| 135 | 136 |
return Status.CANCEL_STATUS; |
| 136 | 137 |
} catch (Exception e) {
|
| 137 | 138 |
Log.printStackTrace(e); |
| 138 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 139 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 139 | 140 |
} catch (Exception e1) { e1.printStackTrace(); }
|
| 140 | 141 |
} finally {
|
| 141 | 142 |
monitor.done(); |
| ... | ... | |
| 164 | 165 |
names.add(line.getName()); |
| 165 | 166 |
} |
| 166 | 167 |
|
| 167 |
System.out.println(RCPMessages.ProgressionDialog_8+queries);
|
|
| 168 |
System.out.println(RCPMessages.MergeCols_5+names);
|
|
| 168 |
System.out.println(TXMCoreMessages.common_queries+queries);
|
|
| 169 |
System.out.println(TXMUIMessages.MergeCols_5+names);
|
|
| 169 | 170 |
|
| 170 | 171 |
QueryAutoCooccurrence cooc = null; |
| 171 | 172 |
try {
|
| ... | ... | |
| 175 | 176 |
System.out.println("Estimated time: "+(nqueries*reqtime/1000)+" secs");
|
| 176 | 177 |
cooc = new QueryAutoCooccurrence(corpus); |
| 177 | 178 |
cooc.setParameters(queries, names, dist, min, struct, oriented, new ConsoleProgressBar(nqueries)); |
| 178 |
System.out.println(RCPMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
| 179 |
System.out.println(RCPMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
| 180 |
System.out.println(RCPMessages.ComputeCoocMatrix_3+dist);
|
|
| 179 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
| 180 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
| 181 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_3+dist);
|
|
| 181 | 182 |
|
| 182 | 183 |
if (outfile != null) {
|
| 183 | 184 |
cooc.toGraphml(outfile); |
| 184 |
System.out.println(RCPMessages.ComputeCoocMatrix_4+outfile);
|
|
| 185 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_4+outfile);
|
|
| 185 | 186 |
} |
| 186 | 187 |
|
| 187 | 188 |
} catch (Exception e) {
|
| 188 | 189 |
Log.printStackTrace(e); |
| 189 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 190 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
| 190 | 191 |
} catch (Exception e1) { e1.printStackTrace(); }
|
| 191 | 192 |
} |
| 192 | 193 |
return cooc; |
| ... | ... | |
| 226 | 227 |
@Override |
| 227 | 228 |
protected void configureShell(Shell newShell) {
|
| 228 | 229 |
super.configureShell(newShell); |
| 229 |
newShell.setText(RCPMessages.ComputeCoocMatrix_7);
|
|
| 230 |
newShell.setText(TXMUIMessages.ComputeCoocMatrix_7);
|
|
| 230 | 231 |
} |
| 231 | 232 |
|
| 232 | 233 |
/* (non-Javadoc) |
| ... | ... | |
| 241 | 242 |
composite.setLayout(layout); |
| 242 | 243 |
|
| 243 | 244 |
Label anaPropLabel = new Label(composite, SWT.NONE); |
| 244 |
anaPropLabel.setText(RCPMessages.ParaBrowserEditor_5);
|
|
| 245 |
anaPropLabel.setText(TXMCoreMessages.common_structure);
|
|
| 245 | 246 |
anaPropLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, false, false)); |
| 246 | 247 |
|
| 247 | 248 |
propsCombo = new org.eclipse.swt.widgets.List(composite, SWT.READ_ONLY); |
| ... | ... | |
| 264 | 265 |
} |
| 265 | 266 |
|
| 266 | 267 |
Label distLabel = new Label(composite, SWT.NONE); |
| 267 |
distLabel.setText(RCPMessages.ComputeCoocMatrix_10);
|
|
| 268 |
distLabel.setText(TXMUIMessages.ComputeCoocMatrix_10);
|
|
| 268 | 269 |
distLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
| 269 | 270 |
false, false)); |
| 270 | 271 |
distSpinner = new Spinner(composite, SWT.BORDER); |
| ... | ... | |
| 273 | 274 |
distSpinner.setSelection(10); |
| 274 | 275 |
|
| 275 | 276 |
Label minLabel = new Label(composite, SWT.NONE); |
| 276 |
minLabel.setText(RCPMessages.ComputeCoocMatrix_11);
|
|
| 277 |
minLabel.setText(TXMUIMessages.ComputeCoocMatrix_11);
|
|
| 277 | 278 |
minLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
| 278 | 279 |
false, false)); |
| 279 | 280 |
minSpinner = new Spinner(composite, SWT.BORDER); |
| ... | ... | |
| 289 | 290 |
orientedButton.setSelection(false); |
| 290 | 291 |
|
| 291 | 292 |
outfileLabel = new Label(composite, SWT.NONE); |
| 292 |
outfileLabel.setText(RCPMessages.ComputeCoocMatrix_12);
|
|
| 293 |
outfileLabel.setText(TXMUIMessages.ComputeCoocMatrix_12);
|
|
| 293 | 294 |
outfileLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
| 294 | 295 |
false, false)); |
| 295 | 296 |
|
| 296 | 297 |
Button outfileButton = new Button(composite, SWT.PUSH); |
| 297 |
outfileButton.setText(RCPMessages.ComputeCoocMatrix_13);
|
|
| 298 |
outfileButton.setText(TXMUIMessages.ComputeCoocMatrix_13);
|
|
| 298 | 299 |
outfileButton.addSelectionListener(new SelectionListener() {
|
| 299 | 300 |
@Override |
| 300 | 301 |
public void widgetSelected(SelectionEvent e) {
|
| tmp/org.txm.treetagger.rcp/plugin.xml (revision 687) | ||
|---|---|---|
| 6 | 6 |
point="org.eclipse.ui.commands"> |
| 7 | 7 |
<command |
| 8 | 8 |
categoryId="org.txm.rcp.category.txm" |
| 9 |
defaultHandler="org.txm.treetagger.commands.Train"
|
|
| 9 |
defaultHandler="org.txm.treetagger.rcp.handlers.Train"
|
|
| 10 | 10 |
id="org.txm.treetagger.commands.Train" |
| 11 | 11 |
name="Train"> |
| 12 | 12 |
</command> |
| 13 | 13 |
<command |
| 14 | 14 |
categoryId="org.txm.rcp.category.txm" |
| 15 |
defaultHandler="org.txm.treetagger.commands.Apply"
|
|
| 15 |
defaultHandler="org.txm.treetagger.rcp.handlers.Apply"
|
|
| 16 | 16 |
id="org.txm.treetagger.commands.Apply" |
| 17 | 17 |
name="Apply"> |
| 18 | 18 |
</command> |
| 19 | 19 |
<command |
| 20 | 20 |
categoryId="org.txm.rcp.category.txm" |
| 21 |
defaultHandler="org.txm.treetagger.commands.LemmaProjection"
|
|
| 21 |
defaultHandler="org.txm.treetagger.rcp.handlers.LemmaProjection"
|
|
| 22 | 22 |
id="org.txm.treetagger.commands.PropertyProjection" |
| 23 | 23 |
name="LemmaProjection"> |
| 24 | 24 |
</command> |
| 25 | 25 |
<command |
| 26 | 26 |
categoryId="org.txm.rcp.category.txm" |
| 27 |
defaultHandler="org.txm.treetagger.commands.RemoveProperties"
|
|
| 28 |
description="Remove properties fomr corpus XML-TXM files"
|
|
| 27 |
defaultHandler="org.txm.treetagger.rcp.handlers.RemoveProperties"
|
|
| 28 |
description="Remove properties from corpus XML-TXM files"
|
|
| 29 | 29 |
id="org.txm.treetagger.commands.RemoveProperties" |
| 30 | 30 |
name="RemoveProperties"> |
| 31 | 31 |
</command> |
| ... | ... | |
| 85 | 85 |
</menu> |
| 86 | 86 |
</menuContribution> |
| 87 | 87 |
</extension> |
| 88 |
<extension |
|
| 89 |
point="org.eclipse.ui.preferencePages"> |
|
| 90 |
<page |
|
| 91 |
category="org.txm.rcp.preferences.NLPPreferencePage" |
|
| 92 |
class="org.txm.treetagger.rcp.preferences.TreeTaggerPreferencePage" |
|
| 93 |
id="org.txm.treetagger.rcp.preferences.TreeTaggerPreferencePage" |
|
| 94 |
name="%page.name.7"> |
|
| 95 |
</page> |
|
| 96 |
</extension> |
|
| 88 | 97 |
|
| 89 | 98 |
</plugin> |
| tmp/org.txm.treetagger.rcp/META-INF/MANIFEST.MF (revision 687) | ||
|---|---|---|
| 1 | 1 |
Manifest-Version: 1.0 |
| 2 | 2 |
Bundle-ManifestVersion: 2 |
| 3 | 3 |
Bundle-Name: TreeTagger |
| 4 |
Bundle-SymbolicName: TreeTagger;singleton:=true
|
|
| 4 |
Bundle-SymbolicName: org.txm.treetagger.rcp;singleton:=true
|
|
| 5 | 5 |
Bundle-Version: 1.0.0.qualifier |
| 6 |
Bundle-Activator: treetagger.Activator |
|
| 7 | 6 |
Require-Bundle: org.eclipse.ui, |
| 7 |
org.txm.treetagger.core;bundle-version="1.0.0", |
|
| 8 | 8 |
org.eclipse.core.runtime, |
| 9 | 9 |
org.eclipse.swt, |
| 10 | 10 |
org.txm.core;bundle-version="0.7.0", |
| tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/Train.java (revision 687) | ||
|---|---|---|
| 1 |
package org.txm.treetagger.rcp.handlers; |
|
| 2 |
|
|
| 3 |
import java.io.BufferedOutputStream; |
|
| 4 |
import java.io.BufferedReader; |
|
| 5 |
import java.io.BufferedWriter; |
|
| 6 |
import java.io.File; |
|
| 7 |
import java.io.FileOutputStream; |
|
| 8 |
import java.io.OutputStreamWriter; |
|
| 9 |
import java.io.PrintStream; |
|
| 10 |
import java.io.PrintWriter; |
|
| 11 |
import java.util.ArrayList; |
|
| 12 |
import java.util.Arrays; |
|
| 13 |
import java.util.HashMap; |
|
| 14 |
import java.util.HashSet; |
|
| 15 |
import java.util.LinkedHashMap; |
|
| 16 |
import java.util.LinkedHashSet; |
|
| 17 |
import java.util.List; |
|
| 18 |
|
|
| 19 |
import org.eclipse.core.commands.AbstractHandler; |
|
| 20 |
import org.eclipse.core.commands.ExecutionEvent; |
|
| 21 |
import org.eclipse.core.commands.ExecutionException; |
|
| 22 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 23 |
import org.eclipse.core.runtime.IStatus; |
|
| 24 |
import org.eclipse.core.runtime.Status; |
|
| 25 |
import org.eclipse.jface.viewers.ISelection; |
|
| 26 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
| 27 |
import org.eclipse.ui.IWorkbenchWindow; |
|
| 28 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
| 29 |
import org.kohsuke.args4j.Option; |
|
| 30 |
import org.txm.core.preferences.TBXPreferences; |
|
| 31 |
import org.txm.core.preferences.TXMPreferences; |
|
| 32 |
import org.txm.index.core.functions.Index; |
|
| 33 |
import org.txm.index.core.functions.Line; |
|
| 34 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
| 35 |
import org.txm.rcp.utils.JobHandler; |
|
| 36 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
| 37 |
import org.txm.searchengine.cqp.CQPEngine; |
|
| 38 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
| 39 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 40 |
import org.txm.searchengine.cqp.corpus.Property; |
|
| 41 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
| 42 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
| 43 |
import org.txm.treetagger.core.preferences.TreeTaggerPreferences; |
|
| 44 |
import org.txm.utils.DeleteDir; |
|
| 45 |
import org.txm.utils.io.IOUtils; |
|
| 46 |
import org.txm.utils.logger.Log; |
|
| 47 |
import org.txm.utils.treetagger.TreeTagger; |
|
| 48 |
|
|
| 49 |
/** |
|
| 50 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
| 51 |
* @see org.eclipse.core.commands.IHandler |
|
| 52 |
* @see org.eclipse.core.commands.AbstractHandler |
|
| 53 |
*/ |
|
| 54 |
public class Train extends AbstractHandler {
|
|
| 55 |
|
|
| 56 |
public Corpus corpus = null; |
|
| 57 |
|
|
| 58 |
@Option(name="model", usage="The model file to create", widget="CreateFile", required=true, def="fr.par") |
|
| 59 |
public File model = null; |
|
| 60 |
@Option(name="posProperty", usage="The pos property", widget="String", required=true, def="frpos") |
|
| 61 |
public String posProperty = null; |
|
| 62 |
@Option(name="sentenceTag", usage="The pos property", widget="String", required=true, def="SENT") |
|
| 63 |
public String sentenceTag = null; |
|
| 64 |
@Option(name="lemmaProperty", usage="The lemma property", widget="String", required=true, def="frlemma") |
|
| 65 |
public String lemmaProperty = null; |
|
| 66 |
@Option(name="lexique", usage="Lexicon file", widget="File", required=true, def="lexicon.txt") |
|
| 67 |
public File lexique = null; |
|
| 68 |
@Option(name="options", usage="TreeTagger supplementary options", widget="String", required=true, def="") |
|
| 69 |
public String options = null; |
|
| 70 |
|
|
| 71 |
/** |
|
| 72 |
* |
|
| 73 |
*/ |
|
| 74 |
public Object execute(ExecutionEvent event) throws ExecutionException {
|
|
| 75 |
|
|
| 76 |
|
|
| 77 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
| 78 |
|
|
| 79 |
ISelection isel = window.getActivePage().getSelection(); |
|
| 80 |
if (isel instanceof IStructuredSelection) {
|
|
| 81 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
| 82 |
Object first = sel.getFirstElement(); |
|
| 83 |
if (first instanceof Corpus) {
|
|
| 84 |
corpus = (Corpus)first; |
|
| 85 |
if (ParametersDialog.open(this)) {
|
|
| 86 |
|
|
| 87 |
train(corpus, model, lexique, new String[]{posProperty, lemmaProperty}, sentenceTag, options.split(" "));
|
|
| 88 |
|
|
| 89 |
return corpus; |
|
| 90 |
} |
|
| 91 |
} |
|
| 92 |
} |
|
| 93 |
|
|
| 94 |
System.out.println("Wrong selection.");
|
|
| 95 |
return null; |
|
| 96 |
} |
|
| 97 |
|
|
| 98 |
public static void train(final Corpus corpus, final File model, final File lexique, final String[] properties, final String sentenceTag, final String[] options) {
|
|
| 99 |
|
|
| 100 |
JobHandler job = new JobHandler("Applying TreeTagger to "+corpus+" corpus.") {
|
|
| 101 |
@Override |
|
| 102 |
protected IStatus run(IProgressMonitor monitor) {
|
|
| 103 |
this.runInit(monitor); |
|
| 104 |
try {
|
|
| 105 |
File lexique2 = lexique; |
|
| 106 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
| 107 |
File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
| 108 |
|
|
| 109 |
System.out.println("TRAIN : "+corpus+" with "+lexique2+" to create "+model+ " with properties "+Arrays.toString(properties));
|
|
| 110 |
|
|
| 111 |
if (properties == null || properties.length != 2) {
|
|
| 112 |
System.out.println("Error can't continue with selected word properties: "+Arrays.toString(properties));
|
|
| 113 |
return Status.CANCEL_STATUS; |
|
| 114 |
} |
|
| 115 |
|
|
| 116 |
for (String p : properties) {
|
|
| 117 |
Property prop = corpus.getProperty(p); |
|
| 118 |
if (prop == null) {
|
|
| 119 |
System.out.println("Missing property in corpus: "+p);
|
|
| 120 |
return Status.CANCEL_STATUS; |
|
| 121 |
} |
|
| 122 |
} |
|
| 123 |
|
|
| 124 |
Property pos = corpus.getProperty(properties[0]); |
|
| 125 |
Property lemma = corpus.getProperty(properties[1]); |
|
| 126 |
|
|
| 127 |
// Prepare temporary directory |
|
| 128 |
File treetaggerSrcDirectory = new File(mainCorpus.getBaseDirectory(), "treetagger"); |
|
| 129 |
DeleteDir.deleteDirectory(treetaggerSrcDirectory); |
|
| 130 |
treetaggerSrcDirectory.mkdirs(); |
|
| 131 |
|
|
| 132 |
HashMap<String, HashSet<String>> simplified_lexicon = null; |
|
| 133 |
HashMap<String, HashSet<String>> simplified_lexicon_errors = null; |
|
| 134 |
int error_counter = 0; |
|
| 135 |
// Create Lexicon file from an Index |
|
| 136 |
if (lexique2 == null || !lexique2.exists()) {
|
|
| 137 |
System.out.println("Warning: no lexicon file or given lexicon file does not exist ("+lexique2+"). Using corpus Index...");
|
|
| 138 |
|
|
| 139 |
File lexiconfile = new File(treetaggerSrcDirectory, "lexicon.txt"); |
|
| 140 |
List<Property> corpusProperties = new ArrayList<Property>(); |
|
| 141 |
corpusProperties.add(mainCorpus.getProperty("word"));
|
|
| 142 |
for (String p : properties) {
|
|
| 143 |
Property prop = mainCorpus.getProperty(p); |
|
| 144 |
if (prop == null) {
|
|
| 145 |
System.out.println("Error, a property is missing: "+p);
|
|
| 146 |
return Status.CANCEL_STATUS; |
|
| 147 |
} |
|
| 148 |
corpusProperties.add(prop); |
|
| 149 |
} |
|
| 150 |
Index index = new Index(mainCorpus); |
|
| 151 |
index.setParameters(new Query("[]"), corpusProperties, null, null, null, null);
|
|
| 152 |
index.compute(monitor); |
|
| 153 |
List<Line> lines = index.getAllLines(); |
|
| 154 |
LinkedHashMap<String, ArrayList<String>> lex = new LinkedHashMap<String, ArrayList<String>>(); |
|
| 155 |
HashMap<String, HashSet<String>> allPosValues = new HashMap<String, HashSet<String>>(); |
|
| 156 |
for (Line l : lines) {
|
|
| 157 |
List<List<String>> values = l.getUnitsProperties(); |
|
| 158 |
String form = values.get(0).get(0); |
|
| 159 |
if (!lex.containsKey(form)) {
|
|
| 160 |
ArrayList<String> pairs = new ArrayList<String>(); |
|
| 161 |
HashSet<String> posValues = new HashSet<String>(); |
|
| 162 |
|
|
| 163 |
allPosValues.put(form, posValues); |
|
| 164 |
lex.put(form, pairs); |
|
| 165 |
} |
|
| 166 |
ArrayList<String> pairs = lex.get(form); |
|
| 167 |
HashSet<String> posValues = allPosValues.get(form); |
|
| 168 |
String posValue = values.get(1).get(0); |
|
| 169 |
String lemmaValue = values.get(2).get(0); |
|
| 170 |
if (posValues.contains(posValue)) {
|
|
| 171 |
|
|
| 172 |
} else {
|
|
| 173 |
posValues.add(posValue); |
|
| 174 |
pairs.add(posValue); |
|
| 175 |
pairs.add(lemmaValue); |
|
| 176 |
} |
|
| 177 |
} |
|
| 178 |
|
|
| 179 |
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lexiconfile), "UTF-8")); |
|
| 180 |
for (String form : lex.keySet()) {
|
|
| 181 |
|
|
| 182 |
writer.write(form); |
|
| 183 |
boolean tab = true; |
|
| 184 |
for (String v : lex.get(form)) {
|
|
| 185 |
if (tab) writer.write("\t"+v);
|
|
| 186 |
else writer.write(" "+v);
|
|
| 187 |
|
|
| 188 |
tab = !tab; |
|
| 189 |
} |
|
| 190 |
writer.write("\n");
|
|
| 191 |
} |
|
| 192 |
writer.close(); |
|
| 193 |
lexique2 = lexiconfile; |
|
| 194 |
} else { // diagnose lexicon content
|
|
| 195 |
simplified_lexicon = new HashMap<String, HashSet<String>>(); |
|
| 196 |
simplified_lexicon_errors = new HashMap<String, HashSet<String>>(); |
|
| 197 |
BufferedReader reader = IOUtils.getReader(lexique2); |
|
| 198 |
String line = reader.readLine(); |
|
| 199 |
while (line != null) {
|
|
| 200 |
String[] split = line.split("\t", 2);
|
|
| 201 |
HashSet<String> posValues = new HashSet<String>(); |
|
| 202 |
simplified_lexicon.put(split[0], posValues); |
|
| 203 |
for (String poslemme : split[1].split("\t")) {
|
|
| 204 |
String[] split2 = poslemme.split(" ", 2);
|
|
| 205 |
posValues.add(split2[0]); |
|
| 206 |
} |
|
| 207 |
line = reader.readLine(); |
|
| 208 |
} |
|
| 209 |
reader.close(); |
|
| 210 |
} |
|
| 211 |
|
|
| 212 |
|
|
| 213 |
// create TT SRC file from CWB indexes |
|
| 214 |
|
|
| 215 |
File ttSrcFile = new File(treetaggerSrcDirectory, mainCorpus.getName()+".tt"); |
|
| 216 |
System.out.println("TT SRC file: "+ttSrcFile.getAbsolutePath());
|
|
| 217 |
BufferedOutputStream fos = new BufferedOutputStream(new FileOutputStream(ttSrcFile)); |
|
| 218 |
PrintStream ps = new PrintStream(fos); |
|
| 219 |
LinkedHashSet<Integer> positions = new LinkedHashSet<Integer>(); |
|
| 220 |
Property word = corpus.getProperty("word");
|
|
| 221 |
AbstractCqiClient CQI = CQPEngine.getCqiClient(); |
|
| 222 |
for (Match m : corpus.getMatches()) {
|
|
| 223 |
for (int i = m.getStart() ; i <= m.getEnd() ; i++) { // end match must be included
|
|
| 224 |
positions.add(i); |
|
| 225 |
|
|
| 226 |
if (positions.size() >= 1000) { // avoid too big array
|
|
| 227 |
int[] positions_array = new int[positions.size()]; |
|
| 228 |
int ip = 0; |
|
| 229 |
for (int p : positions) positions_array[ip++] = p; |
|
| 230 |
String[] words = CQI.cpos2Str(word.getQualifiedName(), positions_array); |
|
| 231 |
String[] values = CQI.cpos2Str(pos.getQualifiedName(), positions_array); |
|
| 232 |
|
|
| 233 |
for (int iW = 0 ; iW < words.length ; iW++) {
|
|
| 234 |
String w = words[iW]; |
|
| 235 |
if (w != null) {
|
|
| 236 |
String s = w+"\t"+values[iW]; |
|
| 237 |
ps.println(s); |
|
| 238 |
|
|
| 239 |
if (simplified_lexicon != null) { // check given lexicon
|
|
| 240 |
if (simplified_lexicon.containsKey(w)) {
|
|
| 241 |
if (!simplified_lexicon.get(w).contains(values[iW])) {
|
|
| 242 |
//System.out.println("Lexicon error: cannot find pos="+values[iW]+" for form="+w);
|
|
| 243 |
if (!simplified_lexicon_errors.containsKey(w)) simplified_lexicon_errors.put(w, new HashSet<String>()); |
|
| 244 |
HashSet<String> error_values = simplified_lexicon_errors.get(w); |
|
| 245 |
error_values.add(values[iW]); |
|
| 246 |
error_counter++; |
|
| 247 |
} |
|
| 248 |
} else {
|
|
| 249 |
//System.out.println("Lexicon error: cannot find form="+w);
|
|
| 250 |
if (!simplified_lexicon_errors.containsKey(w)) simplified_lexicon_errors.put(w, new HashSet<String>()); |
|
| 251 |
HashSet<String> error_values = simplified_lexicon_errors.get(w); |
|
| 252 |
error_values.add("#"+values[iW]);
|
|
| 253 |
error_counter++; |
|
| 254 |
} |
|
| 255 |
} |
|
| 256 |
} |
|
| 257 |
} |
|
| 258 |
positions.clear(); |
|
| 259 |
} |
|
| 260 |
} |
|
| 261 |
} |
|
| 262 |
if (positions.size() > 0) { // write last words
|
|
| 263 |
int[] positions_array = new int[positions.size()]; |
|
| 264 |
int ip = 0; |
|
| 265 |
for (int p : positions) positions_array[ip++] = p; |
|
| 266 |
String[] words = CQI.cpos2Str(word.getQualifiedName(), positions_array); |
|
| 267 |
String[] values = CQI.cpos2Str(pos.getQualifiedName(), positions_array); |
|
| 268 |
|
|
| 269 |
for (int iW = 0 ; iW < words.length ; iW++) {
|
|
| 270 |
String w = words[iW]; |
|
| 271 |
if (w != null) {
|
|
| 272 |
String s = w+"\t"+values[iW]; |
|
| 273 |
ps.println(s); |
|
| 274 |
} |
|
| 275 |
} |
|
| 276 |
positions.clear(); |
|
| 277 |
} |
|
| 278 |
ps.close(); |
|
| 279 |
|
|
| 280 |
if (simplified_lexicon_errors != null && simplified_lexicon_errors.size() > 0) {
|
|
| 281 |
File error_file = new File(treetaggerSrcDirectory, "errors.txt"); |
|
| 282 |
PrintWriter errorwriter = IOUtils.getWriter(error_file); |
|
| 283 |
int c = 0; |
|
| 284 |
System.out.println("Warning, lexicon errors ("+error_counter+") found with words:");
|
|
| 285 |
for (String w : simplified_lexicon_errors.keySet()) {
|
|
| 286 |
errorwriter.println(w+"="+simplified_lexicon_errors.get(w)); |
|
| 287 |
if (c < 10) {
|
|
| 288 |
System.out.println(w+"="+simplified_lexicon_errors.get(w)); |
|
| 289 |
c++; |
|
| 290 |
if (c == 10) System.out.println("... errors display is trucated, see "+error_file.getAbsolutePath());
|
|
| 291 |
} |
|
| 292 |
} |
|
| 293 |
errorwriter.close(); |
|
| 294 |
//System.out.println("Cannot apply train-treetagger if lexicon is missing words and pos.");
|
|
| 295 |
//return Status.CANCEL_STATUS; |
|
| 296 |
File lexique3 = new File(lexique2.getParentFile(), lexique2.getName()+".fix"); |
|
| 297 |
BufferedReader reader = IOUtils.getReader(lexique2); |
|
| 298 |
PrintWriter writer = IOUtils.getWriter(lexique3); |
|
| 299 |
String line = reader.readLine(); |
|
| 300 |
while (line != null) {
|
|
| 301 |
String w = line.split("\t", 2)[0];
|
|
| 302 |
|
|
| 303 |
if (simplified_lexicon_errors.containsKey(w)) {
|
|
| 304 |
for (String p : simplified_lexicon_errors.get(w)) {
|
|
| 305 |
if (!p.startsWith("#"))
|
|
| 306 |
line += ("\t"+p+" <no_lemma>"); // append missing value
|
|
| 307 |
} |
|
| 308 |
simplified_lexicon_errors.remove(w); |
|
| 309 |
} |
|
| 310 |
|
|
| 311 |
writer.println(line); |
|
| 312 |
line = reader.readLine(); |
|
| 313 |
} |
|
| 314 |
|
|
| 315 |
// write missing words |
|
| 316 |
for (String w2 : simplified_lexicon_errors.keySet()) {
|
|
| 317 |
writer.print(w2); |
|
| 318 |
for (String p : simplified_lexicon_errors.get(w2)) {
|
|
| 319 |
writer.print("\t"+p+" <no_lemma>");
|
|
| 320 |
} |
|
| 321 |
writer.println("");
|
|
| 322 |
} |
|
| 323 |
|
|
| 324 |
reader.close(); |
|
| 325 |
writer.close(); |
|
| 326 |
System.out.println("Adding words to a temporary lexicon: "+lexique3);
|
|
| 327 |
lexique2 = lexique3; |
|
| 328 |
} |
|
| 329 |
|
|
| 330 |
// Create open class file : contains all pos values |
|
| 331 |
File openclassfile = new File(treetaggerSrcDirectory, "openclasses.txt"); |
|
| 332 |
PrintWriter openClassFileWriter = IOUtils.getWriter(openclassfile); |
|
| 333 |
|
|
| 334 |
// Lexicon poslexicon = corpus.getLexicon(pos); |
|
| 335 |
// String[] posValues = poslexicon.getForms(); |
|
| 336 |
// for (int iV = 0 ; iV < posValues.length ; iV++) {
|
|
| 337 |
// if (iV == 0) openClassFileWriter.print(posValues[iV]); |
|
| 338 |
// else openClassFileWriter.print(" "+posValues[iV]);
|
|
| 339 |
// } |
|
| 340 |
openClassFileWriter.close(); |
|
| 341 |
|
|
| 342 |
// Call treetagger-train |
|
| 343 |
if (ttSrcFile.exists() && lexique2.exists() && openclassfile.exists()) {
|
|
| 344 |
System.out.println("Running ");
|
|
| 345 |
String treetaggerBinDirectory = new File(TXMPreferences.getString(TreeTaggerPreferences.INSTALL_PATH, TBXPreferences.PREFERENCES_NODE), "bin").getAbsolutePath(); |
|
| 346 |
if (!treetaggerBinDirectory.endsWith("/")) treetaggerBinDirectory += "/";
|
|
| 347 |
|
|
| 348 |
TreeTagger tt = new TreeTagger(treetaggerBinDirectory, options); |
|
| 349 |
tt.settoken(); |
|
| 350 |
tt.setquiet(); |
|
| 351 |
tt.setlemma(); |
|
| 352 |
tt.setsgml(); |
|
| 353 |
tt.setst(sentenceTag); |
|
| 354 |
tt.setproto(); |
|
| 355 |
tt.setutf8(); |
|
| 356 |
tt.debug(true); |
|
| 357 |
tt.traintreetagger(lexique2.getAbsolutePath(), openclassfile.getAbsolutePath(), ttSrcFile.getAbsolutePath(), model.getAbsolutePath()); |
|
| 358 |
|
|
| 359 |
System.out.println("Done: "+model.getAbsolutePath());
|
|
| 360 |
} else {
|
|
| 361 |
System.out.println("Aborting.");
|
|
| 362 |
} |
|
| 363 |
|
|
| 364 |
return Status.OK_STATUS; |
|
| 365 |
} catch (Exception e) {
|
|
| 366 |
System.out.println("Error while training TT: "+e);
|
|
| 367 |
Log.printStackTrace(e); |
|
| 368 |
} |
|
| 369 |
return Status.CANCEL_STATUS; |
|
| 370 |
} |
|
| 371 |
}; |
|
| 372 |
job.schedule(); |
|
| 373 |
} |
|
| 374 |
} |
|
| 0 | 375 | |
| tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/Apply.java (revision 687) | ||
|---|---|---|
| 1 |
package org.txm.treetagger.rcp.handlers; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.FileFilter; |
|
| 5 |
import java.util.Arrays; |
|
| 6 |
import java.util.HashMap; |
|
| 7 |
|
|
| 8 |
import org.eclipse.core.commands.AbstractHandler; |
|
| 9 |
import org.eclipse.core.commands.ExecutionEvent; |
|
| 10 |
import org.eclipse.core.commands.ExecutionException; |
|
| 11 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 12 |
import org.eclipse.core.runtime.IStatus; |
|
| 13 |
import org.eclipse.core.runtime.Status; |
|
| 14 |
import org.eclipse.jface.viewers.ISelection; |
|
| 15 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
| 16 |
import org.eclipse.ui.IWorkbenchWindow; |
|
| 17 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
| 18 |
import org.kohsuke.args4j.Option; |
|
| 19 |
import org.txm.rcp.commands.workspace.UpdateCorpus; |
|
| 20 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
| 21 |
import org.txm.rcp.utils.JobHandler; |
|
| 22 |
import org.txm.scripts.teitxm.Annotate; |
|
| 23 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
| 24 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 25 |
|
|
| 26 |
/** |
|
| 27 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
| 28 |
* @see org.eclipse.core.commands.IHandler |
|
| 29 |
* @see org.eclipse.core.commands.AbstractHandler |
|
| 30 |
*/ |
|
| 31 |
public class Apply extends AbstractHandler {
|
|
| 32 |
|
|
| 33 |
@Option(name="model", usage="Model file", widget="File", required=true, def="model.par") |
|
| 34 |
public File model = null; |
|
| 35 |
@Option(name="posProperty", usage="The pos property", widget="String", required=true, def="frpos") |
|
| 36 |
public String posProperty = null; |
|
| 37 |
@Option(name="lemmaProperty", usage="The lemma property", widget="String", required=true, def="frlemma") |
|
| 38 |
public String lemmaProperty = null; |
|
| 39 |
@Option(name="options", usage="TreeTagger supplementary options", widget="String", required=true, def="") |
|
| 40 |
public String options = null; |
|
| 41 |
|
|
| 42 |
/** |
|
| 43 |
* |
|
| 44 |
*/ |
|
| 45 |
public Object execute(ExecutionEvent event) throws ExecutionException {
|
|
| 46 |
Corpus corpus = null; |
|
| 47 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
| 48 |
|
|
| 49 |
ISelection isel = window.getActivePage().getSelection(); |
|
| 50 |
if (isel instanceof IStructuredSelection) {
|
|
| 51 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
| 52 |
Object first = sel.getFirstElement(); |
|
| 53 |
if (first instanceof Corpus) {
|
|
| 54 |
corpus = (Corpus)first; |
|
| 55 |
if (ParametersDialog.open(this)) {
|
|
| 56 |
apply(corpus, model, new String[]{posProperty, lemmaProperty}, options.split(" "));
|
|
| 57 |
return corpus; |
|
| 58 |
} |
|
| 59 |
} |
|
| 60 |
} |
|
| 61 |
|
|
| 62 |
System.out.println("Wrong selection.");
|
|
| 63 |
return null; |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
public static void apply(Corpus corpus, final File model, final String[] properties, final String[] options) {
|
|
| 67 |
final MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
| 68 |
final File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
| 69 |
final File txmDirectory = new File(corpusBinaryDirectory, "txm/"+mainCorpus.getName()); |
|
| 70 |
|
|
| 71 |
if (!txmDirectory.exists()) {
|
|
| 72 |
System.out.println("Can't apply TreeTagger to a corpus with no XML-TXM files.");
|
|
| 73 |
} |
|
| 74 |
|
|
| 75 |
final File[] files = txmDirectory.listFiles(new FileFilter() {
|
|
| 76 |
@Override |
|
| 77 |
public boolean accept(File file) {
|
|
| 78 |
return file.isFile() && file.canWrite() && file.getName().endsWith(".xml");
|
|
| 79 |
} |
|
| 80 |
}); |
|
| 81 |
|
|
| 82 |
if (files == null || files.length == 0) {
|
|
| 83 |
System.out.println("Can't apply TreeTagger to a corpus with no XML-TXM files in "+txmDirectory);
|
|
| 84 |
} |
|
| 85 |
|
|
| 86 |
String lang = model.getName(); |
|
| 87 |
if (!lang.endsWith(".par")) {
|
|
| 88 |
System.out.println("Model file name must ends with the '.par' extension");
|
|
| 89 |
return; |
|
| 90 |
} |
|
| 91 |
lang = lang.substring(0, lang.indexOf(".par"));
|
|
| 92 |
|
|
| 93 |
final HashMap<String, String> hash = new HashMap<String, String>(); |
|
| 94 |
for (File txmFile : files) {
|
|
| 95 |
hash.put(txmFile.getName(), lang); |
|
| 96 |
} |
|
| 97 |
|
|
| 98 |
for (int i = 0 ; i < properties.length ; i++) properties[i] = properties[i].trim(); |
|
| 99 |
|
|
| 100 |
System.out.println("APPLY : "+model+" to "+corpus+" updating "+Arrays.toString(properties)+ " with options "+Arrays.toString(options));
|
|
| 101 |
JobHandler job = new JobHandler("Applying TreeTagger to "+corpus+" corpus.") {
|
|
| 102 |
@Override |
|
| 103 |
protected IStatus run(IProgressMonitor monitor) {
|
|
| 104 |
this.runInit(monitor); |
|
| 105 |
Annotate annotator = new Annotate(); |
|
| 106 |
annotator.setModelsDirectory(model.getParentFile()); |
|
| 107 |
annotator.setDebug(); |
|
| 108 |
if (!annotator.run(corpusBinaryDirectory, txmDirectory, hash, true, properties, options)) {
|
|
| 109 |
System.out.println("Fail to apply TreeTagger with "+txmDirectory+" files.");
|
|
| 110 |
return Status.CANCEL_STATUS; |
|
| 111 |
} |
|
| 112 |
System.out.println("Done. Updating corpus...");
|
|
| 113 |
|
|
| 114 |
if (UpdateCorpus.update(mainCorpus) == null) {
|
|
| 115 |
System.out.println("Fail to update corpus indexes and editions.");
|
|
| 116 |
} |
|
| 117 |
System.out.println("Done.");
|
|
| 118 |
return Status.OK_STATUS;//frppos |
|
| 119 |
} |
|
| 120 |
}; |
|
| 121 |
job.schedule(); |
|
| 122 |
} |
|
| 123 |
} |
|
| 0 | 124 | |
| tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/LemmaProjection.java (revision 687) | ||
|---|---|---|
| 1 |
package org.txm.treetagger.rcp.handlers; |
|
| 2 |
|
|
| 3 |
import java.io.BufferedReader; |
|
| 4 |
import java.io.File; |
|
| 5 |
import java.io.FileFilter; |
|
| 6 |
import java.io.PrintWriter; |
|
| 7 |
import java.util.Arrays; |
|
| 8 |
import java.util.Collections; |
|
| 9 |
import java.util.HashSet; |
|
| 10 |
import java.util.LinkedHashMap; |
|
| 11 |
import java.util.LinkedHashSet; |
|
| 12 |
|
|
| 13 |
import org.apache.commons.lang.StringUtils; |
|
| 14 |
import org.eclipse.core.commands.AbstractHandler; |
|
| 15 |
import org.eclipse.core.commands.ExecutionEvent; |
|
| 16 |
import org.eclipse.core.commands.ExecutionException; |
|
| 17 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
| 18 |
import org.eclipse.core.runtime.IStatus; |
|
| 19 |
import org.eclipse.core.runtime.Status; |
|
| 20 |
import org.eclipse.jface.viewers.ISelection; |
|
| 21 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
| 22 |
import org.eclipse.ui.IWorkbenchWindow; |
|
| 23 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
| 24 |
import org.kohsuke.args4j.Option; |
|
| 25 |
import org.txm.Toolbox; |
|
| 26 |
import org.txm.core.preferences.TBXPreferences; |
|
| 27 |
import org.txm.core.preferences.TXMPreferences; |
|
| 28 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
| 29 |
import org.txm.rcp.utils.JobHandler; |
|
| 30 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
| 31 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
| 32 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
| 33 |
import org.txm.searchengine.cqp.corpus.Property; |
|
| 34 |
import org.txm.stat.utils.ConsoleProgressBar; |
|
| 35 |
import org.txm.utils.DeleteDir; |
|
| 36 |
import org.txm.utils.io.FileCopy; |
|
| 37 |
import org.txm.utils.io.IOUtils; |
|
| 38 |
import org.txm.utils.Tuple; |
|
| 39 |
|
|
| 40 |
/** |
|
| 41 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
| 42 |
* @see org.eclipse.core.commands.IHandler |
|
| 43 |
* @see org.eclipse.core.commands.AbstractHandler |
|
| 44 |
*/ |
|
| 45 |
public class LemmaProjection extends AbstractHandler {
|
|
| 46 |
|
|
| 47 |
protected static final String EXTRA = "extra"; |
|
| 48 |
@Option(name="dictionary", usage="TSV Dictionary file with form, msd, lemma, source columns", widget="File", required=true, def="frolex.tsv") |
|
| 49 |
public File dictionary = null; |
|
| 50 |
@Option(name="extrarules", usage="form+pos rules files", widget="File", required=false, def="extrarules.tsv") |
|
| 51 |
public File extrarules = null; |
|
| 52 |
@Option(name="posproperty", usage="The lexicon property to read", widget="String", required=true, def="frpos") |
|
| 53 |
public String posproperty = null; |
|
| 54 |
@Option(name="lemmaproperty", usage="The property to create/update in the corpus", widget="String", required=true, def="plemma") |
|
| 55 |
public String lemmaproperty = null; |
|
| 56 |
@Option(name="formAsLemmaPosList", usage="Pos values lemma exceptions", widget="String", required=false, def="NOMPro") |
|
| 57 |
public String formAsLemmaPosList = null; |
|
| 58 |
@Option(name="sourcePriorityList", usage="The property to create/update in the corpus", widget="String", required=true, def="TL") |
|
| 59 |
public String sourcePriorityList = null; |
|
| 60 |
|
|
| 61 |
/** |
|
| 62 |
* |
|
| 63 |
*/ |
|
| 64 |
public Object execute(ExecutionEvent event) throws ExecutionException {
|
|
| 65 |
Corpus corpus = null; |
|
| 66 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
| 67 |
|
|
| 68 |
ISelection isel = window.getActivePage().getSelection(); |
|
| 69 |
if (isel instanceof IStructuredSelection) {
|
|
| 70 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
| 71 |
Object first = sel.getFirstElement(); |
|
| 72 |
if (first instanceof Corpus) {
|
|
| 73 |
corpus = (Corpus)first; |
|
| 74 |
if (ParametersDialog.open(this)) {
|
|
| 75 |
LinkedHashSet<String> formAsLemmaPosSet = new LinkedHashSet<String>(); |
|
| 76 |
formAsLemmaPosSet.addAll(Arrays.asList(formAsLemmaPosList.split(",")));
|
|
| 77 |
LinkedHashSet<String> sourcePrioritySet = new LinkedHashSet<String>(); |
|
| 78 |
if (extrarules != null && extrarules.exists()) sourcePrioritySet.add(EXTRA); // extra must be the first source |
|
| 79 |
sourcePrioritySet.addAll(Arrays.asList(sourcePriorityList.split(",")));
|
|
| 80 |
|
|
| 81 |
System.out.println("formAsLemmaPosSet="+formAsLemmaPosSet);
|
|
| 82 |
System.out.println("sourcePrioritySet="+sourcePrioritySet);
|
|
| 83 |
apply(corpus, dictionary, extrarules, posproperty, lemmaproperty, formAsLemmaPosSet, sourcePrioritySet); |
|
| 84 |
return corpus; |
|
| 85 |
} |
|
| 86 |
} |
|
| 87 |
} |
|
| 88 |
|
|
| 89 |
System.out.println("Wrong selection.");
|
|
| 90 |
return null; |
|
| 91 |
} |
|
| 92 |
|
|
| 93 |
public static void apply(final Corpus corpus, final File dictionary, final File extrarules, final String posproperty, |
|
| 94 |
final String targetproperty, final LinkedHashSet<String> formAsLemmaPosList, final LinkedHashSet<String> sourceprioritylist) {
|
|
| 95 |
final MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
| 96 |
final File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
| 97 |
final File txmDirectory = new File(corpusBinaryDirectory, "txm/"+mainCorpus.getName()); |
|
| 98 |
|
|
| 99 |
if (!txmDirectory.exists()) {
|
|
| 100 |
System.out.println("Can't process a corpus with no XML-TXM files directory: "+txmDirectory);
|
|
| 101 |
return; |
|
| 102 |
} |
|
| 103 |
|
|
| 104 |
final File[] files = txmDirectory.listFiles(new FileFilter() {
|
|
| 105 |
@Override |
|
| 106 |
public boolean accept(File file) {
|
|
| 107 |
return file.isFile() && file.canWrite() && file.getName().endsWith(".xml");
|
|
| 108 |
} |
|
| 109 |
}); |
|
| 110 |
|
|
| 111 |
Property pos = null; |
|
| 112 |
try {
|
|
| 113 |
pos = mainCorpus.getProperty(posproperty); |
|
| 114 |
} catch (CqiClientException e1) {
|
|
| 115 |
// TODO Auto-generated catch block |
|
| 116 |
e1.printStackTrace(); |
|
| 117 |
} |
|
| 118 |
if (pos == null) {
|
|
| 119 |
System.out.println("No pos property found with name="+posproperty);
|
|
| 120 |
return; |
|
| 121 |
} |
|
| 122 |
|
|
| 123 |
if (files == null || files.length == 0) {
|
|
| 124 |
System.out.println("Can't process a corpus with no XML-TXM files in "+txmDirectory);
|
|
| 125 |
return; |
|
| 126 |
} |
|
| 127 |
|
|
| 128 |
System.out.println("APPLYING : "+dictionary+" to "+mainCorpus+": creating/updating "+targetproperty+ " property with lexicon "+dictionary);
|
|
| 129 |
JobHandler job = new JobHandler("Creating/Updating "+targetproperty+" property.") {
|
|
| 130 |
@Override |
|
| 131 |
protected IStatus run(IProgressMonitor monitor) {
|
|
| 132 |
this.runInit(monitor); |
|
| 133 |
Tuple t; |
|
| 134 |
LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>> rules = new LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>>(); |
|
| 135 |
HashSet<String> formAsLemmaPosSet = new HashSet<String>(); |
|
| 136 |
try {
|
|
| 137 |
// load rules |
|
| 138 |
BufferedReader reader = IOUtils.getReader(dictionary); |
|
| 139 |
String line = reader.readLine(); |
|
| 140 |
while (line != null) {
|
|
| 141 |
String[] splitTab = line.split("\t");
|
|
| 142 |
if (splitTab.length != 5) {
|
|
| 143 |
System.out.println("Error in dictionary files with line='"+line+"': length is not 5. Found: "+Arrays.toString(splitTab));
|
|
| 144 |
line = reader.readLine(); |
|
| 145 |
reader.close(); |
|
| 146 |
return Status.CANCEL_STATUS; |
|
| 147 |
} |
|
| 148 |
String form = splitTab[0]; |
|
| 149 |
String pos = splitTab[1];//.replace("<no_pos>|", "").replace("|<no_pos>|", "").replace("|<no_pos>", "");
|
|
| 150 |
String lemma = splitTab[2];//.replace("<no_lemma>|", "").replace("|<no_lemma>|", "").replace("|<no_lemma>", "");
|
|
| 151 |
String source = splitTab[3]; |
|
| 152 |
|
|
| 153 |
if (! rules.containsKey(form)) rules.put(form, new LinkedHashMap<String, LinkedHashMap<String, String>>()); |
|
| 154 |
LinkedHashMap<String, LinkedHashMap<String, String>> posHash = rules.get(form); |
|
| 155 |
|
|
| 156 |
if (!lemma.equals("<no_lemma>")) {
|
|
| 157 |
if (!posHash.containsKey(pos)) posHash.put(pos, new LinkedHashMap<String, String>()); |
|
| 158 |
LinkedHashMap<String, String> sourceHash = posHash.get(pos); |
|
| 159 |
|
|
| 160 |
sourceHash.put(source, lemma); |
|
| 161 |
} |
|
| 162 |
line = reader.readLine(); |
|
| 163 |
} |
|
| 164 |
reader.close(); |
|
| 165 |
System.out.println("Dictionary rules loaded: "+rules.size());
|
|
| 166 |
|
|
| 167 |
if (extrarules.exists()) {
|
|
| 168 |
reader = IOUtils.getReader(extrarules); |
|
| 169 |
line = reader.readLine(); |
|
| 170 |
while (line != null) {
|
|
| 171 |
String[] splitTab = line.split("\t");
|
|
| 172 |
if (splitTab.length != 3) {
|
|
| 173 |
System.out.println("Error in extra rule files with line='"+line+"': length is not 3.");
|
|
| 174 |
line = reader.readLine(); |
|
| 175 |
continue; |
|
| 176 |
} |
|
| 177 |
String form = splitTab[0]; |
|
| 178 |
String pos = splitTab[1]; |
|
| 179 |
String lemma = splitTab[2]; |
|
| 180 |
if (! rules.containsKey(form)) rules.put(form, new LinkedHashMap<String, LinkedHashMap<String, String>>()); |
|
| 181 |
LinkedHashMap<String, LinkedHashMap<String, String>> posHash = rules.get(form); |
|
| 182 |
|
|
| 183 |
if (!posHash.containsKey(pos)) posHash.put(pos, new LinkedHashMap<String, String>()); |
|
| 184 |
LinkedHashMap<String, String> sourceHash = posHash.get(pos); |
|
| 185 |
sourceHash.put(EXTRA, lemma); |
|
| 186 |
|
|
| 187 |
} |
|
| 188 |
reader.close(); |
|
| 189 |
System.out.println("Dictionary extra rules loaded: "+rules.size());
|
|
| 190 |
} else {
|
|
| 191 |
System.out.println("No extra rule loaded.");
|
|
| 192 |
} |
|
| 193 |
|
|
| 194 |
PrintWriter writer = IOUtils.getWriter("/tmp/rules.txt");
|
|
| 195 |
for (String k : rules.keySet()) {
|
|
| 196 |
writer.println("FORM="+k);
|
|
| 197 |
LinkedHashMap<String, LinkedHashMap<String, String>> rules2 = rules.get(k); |
|
| 198 |
for (String k2 : rules2.keySet()) {
|
|
| 199 |
writer.println(" POS="+k2);
|
|
| 200 |
LinkedHashMap<String, String> rules3 = rules2.get(k2); |
|
| 201 |
for (String k3 : rules3.keySet()) {
|
|
| 202 |
writer.println(" SOURCE="+k3);
|
|
| 203 |
String ls2 = rules3.get(k3); |
|
| 204 |
writer.println(" LEMMA="+ls2);
|
|
| 205 |
} |
|
| 206 |
} |
|
| 207 |
} |
|
| 208 |
writer.close(); |
|
| 209 |
System.out.println("RULE DUMP: /tmp/rules.txt");
|
|
| 210 |
|
|
| 211 |
// load rules |
|
| 212 |
for (String s : formAsLemmaPosList) {
|
|
| 213 |
formAsLemmaPosSet.add(s); |
|
| 214 |
} |
|
| 215 |
System.out.println("POS exception rules loaded: "+formAsLemmaPosSet.size());
|
|
| 216 |
|
|
| 217 |
// save previous version of XML-TXM files |
|
| 218 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous"); |
|
| 219 |
DeleteDir.deleteDirectory(previousXMLTXMDirectory); |
|
| 220 |
FileCopy.copyFiles(txmDirectory, previousXMLTXMDirectory); |
|
| 221 |
|
|
| 222 |
// work |
|
| 223 |
File noMatchsFile = new File(TXMPreferences.getString(TBXPreferences.USER_TXM_HOME, TBXPreferences.PREFERENCES_NODE), "results/nomatch.txt"); |
|
| 224 |
HashSet<String> noMatchsSet = new HashSet<String>(); |
|
| 225 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
|
| 226 |
for (File xmlFile : files) {
|
|
| 227 |
cpb.tick(); |
|
| 228 |
XMLLemmaProjection p = new XMLLemmaProjection(xmlFile, rules, formAsLemmaPosSet, sourceprioritylist, posproperty, targetproperty); |
|
| 229 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName()); |
|
| 230 |
if (p.process(tmpFile)) {
|
|
| 231 |
if (xmlFile.delete() && tmpFile.renameTo(xmlFile)) {
|
|
| 232 |
// ok |
|
| 233 |
} else {
|
|
| 234 |
System.out.println("Error during lemma projection: can't replace XML-TXM file: "+xmlFile);
|
|
| 235 |
return Status.CANCEL_STATUS; |
|
| 236 |
} |
|
| 237 |
} else {
|
|
| 238 |
System.out.println("Error during lemma projection. Aborting.");
|
|
| 239 |
return Status.CANCEL_STATUS; |
|
| 240 |
} |
|
| 241 |
if (p.getNoMatchValues().size() > 0) {
|
|
| 242 |
System.out.println("No matchs found with file "+xmlFile.getName()+": "+p.getNoMatchValues());
|
|
| 243 |
noMatchsSet.addAll(p.getNoMatchValues()); |
|
| 244 |
} |
|
| 245 |
} |
|
| 246 |
|
|
| 247 |
if (noMatchsSet.size() > 0) {
|
|
| 248 |
System.out.println("Missing lemma values report saved in: "+noMatchsFile);
|
|
| 249 |
IOUtils.write(noMatchsFile, StringUtils.join(noMatchsSet, "\n")); |
|
| 250 |
} |
|
| 251 |
|
|
| 252 |
cpb.done(); |
|
| 253 |
monitor.worked(50); |
|
| 254 |
|
|
| 255 |
// update corpus |
|
| 256 |
// update corpus indexes and edition |
|
| 257 |
// String txmhome = Toolbox.getTXMHOMEPATH(); |
|
| 258 |
// |
|
| 259 |
// BaseParameters params = corpus.getBase().getBaseParameters(); |
|
| 260 |
// params.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false"); //too soon |
|
| 261 |
// params.getKeyValueParameters().put(ImportKeys.DEBUG, Log.getLevel().intValue() < Level.WARNING.intValue()); // need debug for experimental stuff |
|
| 262 |
// params.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "true"); |
|
| 263 |
// |
|
| 264 |
// monitor.setTaskName("Updating corpus");
|
|
| 265 |
// File scriptDir = new File(txmhome, "scripts/import"); |
|
| 266 |
// File script = new File(scriptDir, "xtzLoader.groovy"); |
|
| 267 |
// System.out.println("Updating corpus "+corpus+" using "+params.paramFile);
|
|
| 268 |
// boolean ret = ExecuteImportScript.executeScript(script.getAbsolutePath(), params); |
|
| 269 |
// if (!ret) {
|
|
| 270 |
// System.out.println("Error during corpus re-import, check the XML-TXM files. Previous version can be restored from "+previousXMLTXMDirectory);
|
|
| 271 |
// return Status.CANCEL_STATUS; |
|
| 272 |
// } |
|
| 273 |
// Display.getDefault().syncExec(new Runnable() {
|
|
| 274 |
// @Override |
|
| 275 |
// public void run() {CloseEditorsUsing.corpus(corpus);}
|
|
| 276 |
// }); |
|
| 277 |
// monitor.worked(50); |
|
| 278 |
|
|
| 279 |
} catch (Exception e) {
|
|
| 280 |
e.printStackTrace(); |
|
| 281 |
return Status.CANCEL_STATUS; |
|
| 282 |
} |
|
| 283 |
System.out.println("Done.");
|
|
| 284 |
return Status.OK_STATUS; |
|
| 285 |
} |
|
| 286 |
}; |
|
| 287 |
job.schedule(); |
|
| 288 |
} |
|
| 289 |
} |
|
| 0 | 290 | |
| tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/XMLLemmaProjection.java (revision 687) | ||
|---|---|---|
| 1 |
package org.txm.treetagger.rcp.handlers; |
|
| 2 |
|
|
| 3 |
import java.io.File; |
|
| 4 |
import java.io.IOException; |
|
| 5 |
import java.util.HashSet; |
|
| 6 |
import java.util.LinkedHashMap; |
|
| 7 |
import java.util.LinkedHashSet; |
|
| 8 |
import java.util.regex.Pattern; |
|
| 9 |
|
|
| 10 |
import javax.xml.stream.XMLStreamException; |
|
| 11 |
|
|
| 12 |
import org.txm.importer.StaxIdentityParser; |
|
| 13 |
|
|
| 14 |
public class XMLLemmaProjection extends StaxIdentityParser {
|
|
| 15 |
|
|
| 16 |
// form -> pos -> source -> lemma |
|
| 17 |
protected LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>> rules = null; |
|
| 18 |
protected HashSet<String> formAsLemmaPosList = null; |
|
| 19 |
protected String lemmaProperty; |
|
| 20 |
|
|
| 21 |
protected HashSet<String> noMatchValues = new HashSet<String>(); |
|
| 22 |
protected String posProperty; |
|
| 23 |
protected LinkedHashSet<String> lemmaSourcePriorityList; |
|
| 24 |
|
|
| 25 |
public XMLLemmaProjection(File infile, LinkedHashMap<String, LinkedHashMap<String, |
|
| 26 |
LinkedHashMap<String, String>>> rules, |
|
| 27 |
HashSet<String> formAsLemmaPosList, |
|
| 28 |
LinkedHashSet<String> lemmaSourcePriorityList, |
|
| 29 |
String posProperty, String lemmaProperty) throws IOException, XMLStreamException {
|
|
| 30 |
super(infile); |
|
| 31 |
this.rules = rules; |
|
| 32 |
this.formAsLemmaPosList = formAsLemmaPosList; |
|
| 33 |
this.lemmaSourcePriorityList = lemmaSourcePriorityList; |
|
| 34 |
this.lemmaProperty = lemmaProperty; |
|
| 35 |
this.posProperty = posProperty; |
|
| 36 |
|
|
| 37 |
// the XML-TXM files word properties name starts wit # (they are references) |
|
| 38 |
if (!this.lemmaProperty.startsWith("#")) this.lemmaProperty = "#"+this.lemmaProperty;
|
|
| 39 |
if (!this.posProperty.startsWith("#")) this.posProperty = "#"+this.posProperty;
|
|
| 40 |
} |
|
| 41 |
|
|
| 42 |
boolean inW = false, inAna = false, inForm; |
|
| 43 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
|
| 44 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
|
| 45 |
String typeName = null; |
|
| 46 |
String respName = null; |
|
| 47 |
String formValue, typeValue = null; |
|
| 48 |
|
|
| 49 |
@Override |
|
| 50 |
public void processStartElement() throws XMLStreamException, IOException {
|
|
| 51 |
if (!inW) super.processStartElement(); // don't write W content |
|
| 52 |
|
|
| 53 |
if (localname.equals("w")) {
|
|
| 54 |
inW = true; |
|
| 55 |
anaValues.clear(); |
|
| 56 |
anaResps.clear(); |
|
| 57 |
|
|
| 58 |
//initialize the new type to a empty value in case there is transformation rule |
|
| 59 |
anaValues.put(lemmaProperty, ""); |
|
| 60 |
anaResps.put(lemmaProperty, "#txm_recode"); |
|
| 61 |
} else if (localname.equals("ana")) {
|
|
| 62 |
inAna = true; |
|
| 63 |
typeName = parser.getAttributeValue(null, "type"); |
|
| 64 |
respName = parser.getAttributeValue(null, "resp"); |
|
| 65 |
anaResps.put(typeName, respName); |
|
| 66 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
| 67 |
typeValue = ""; |
|
| 68 |
} else if (localname.equals("form")) {
|
|
| 69 |
inForm = true; |
|
| 70 |
formValue = ""; |
|
| 71 |
} |
|
Formats disponibles : Unified diff