Révision 687
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/handlers/ReferencerToConc.java (revision 687) | ||
---|---|---|
40 | 40 |
import org.eclipse.ui.handlers.HandlerUtil; |
41 | 41 |
import org.txm.concordance.core.functions.Concordance; |
42 | 42 |
import org.txm.concordance.rcp.editors.ConcordanceEditor; |
43 |
import org.txm.rcp.RCPMessages; |
|
44 | 43 |
import org.txm.rcp.editors.TXMResultEditorInput; |
44 |
import org.txm.rcp.messages.TXMUIMessages; |
|
45 | 45 |
import org.txm.referencer.core.functions.Referencer; |
46 | 46 |
import org.txm.referencer.core.functions.Referencer.Line; |
47 | 47 |
import org.txm.referencer.rcp.editors.ReferencerEditor; |
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/preferences/ReferencerPreferencePage.java (revision 687) | ||
---|---|---|
28 | 28 |
package org.txm.referencer.rcp.preferences; |
29 | 29 |
|
30 | 30 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
31 |
import org.txm.rcp.RCPMessages; |
|
32 | 31 |
import org.txm.rcp.preferences.RCPPreferences; |
33 | 32 |
import org.txm.rcp.preferences.RCPPreferencesPage; |
33 |
import org.txm.referencer.rcp.messages.ReferencerUIMessages; |
|
34 | 34 |
|
35 | 35 |
/** |
36 | 36 |
* This class represents a preference page that is contributed to the |
... | ... | |
53 | 53 |
*/ |
54 | 54 |
public ReferencerPreferencePage() { |
55 | 55 |
super(); |
56 |
setTitle(RCPMessages.ReferencerPreferencePage_2);
|
|
56 |
setTitle(ReferencerUIMessages.ReferencerPreferencePage_2);
|
|
57 | 57 |
} |
58 | 58 |
|
59 | 59 |
/** |
... | ... | |
64 | 64 |
@Override |
65 | 65 |
public void createFieldEditors() { |
66 | 66 |
|
67 |
referencer_sorttype = new BooleanFieldEditor(RCPPreferences.SORTBYFREQ, |
|
68 |
RCPMessages.ReferencerPreferencePage_3, getFieldEditorParent()); |
|
67 |
referencer_sorttype = new BooleanFieldEditor(RCPPreferences.SORTBYFREQ, ReferencerUIMessages.ReferencerPreferencePage_3, getFieldEditorParent()); |
|
69 | 68 |
|
70 | 69 |
addField(referencer_sorttype); |
71 | 70 |
|
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/ReferencerUIMessages.java (revision 687) | ||
---|---|---|
1 |
|
|
2 |
package org.txm.referencer.rcp.messages; |
|
3 |
|
|
4 |
import org.eclipse.osgi.util.NLS; |
|
5 |
import org.txm.utils.messages.Utf8NLS; |
|
6 |
|
|
7 |
public class ReferencerUIMessages extends NLS { |
|
8 |
|
|
9 |
private static final String BUNDLE_NAME = "org.txm.referencer.rcp.messages.messages"; //$NON-NLS-1$ |
|
10 |
|
|
11 |
|
|
12 |
public static String ReferencerEditor_0; |
|
13 |
public static String ReferencerEditor_10; |
|
14 |
public static String ReferencerEditor_11; |
|
15 |
public static String ReferencerEditor_12; |
|
16 |
public static String ReferencerEditor_13; |
|
17 |
public static String ReferencerEditor_17; |
|
18 |
public static String ReferencerEditor_19; |
|
19 |
public static String ReferencerEditor_21; |
|
20 |
public static String ReferencerEditor_23; |
|
21 |
public static String ReferencerEditor_25; |
|
22 |
public static String ReferencerEditor_5; |
|
23 |
public static String ReferencerEditor_6; |
|
24 |
public static String ReferencerEditor_7; |
|
25 |
public static String ReferencerEditor_8; |
|
26 |
public static String ReferencerPreferencePage_2; |
|
27 |
public static String ReferencerPreferencePage_3; |
|
28 |
|
|
29 |
|
|
30 |
static { |
|
31 |
// initialize resource bundle |
|
32 |
Utf8NLS.initializeMessages(BUNDLE_NAME, ReferencerUIMessages.class); |
|
33 |
} |
|
34 |
|
|
35 |
private ReferencerUIMessages() { |
|
36 |
} |
|
37 |
} |
|
0 | 38 |
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/messages_ru.properties (revision 687) | ||
---|---|---|
1 |
|
|
2 |
ReferencerEditor_0 = Расчет ссылок <{0}> в {1} с приоритетом {2} по шаблону {3} |
|
3 |
ReferencerEditor_10 = Получение индексов |
|
4 |
ReferencerEditor_11 = Ссылки |
|
5 |
ReferencerEditor_12 = Группировка позиций по индексам |
|
6 |
ReferencerEditor_13 = Обновление интерфейса |
|
7 |
ReferencerEditor_17 = Показ первой страницы результатов |
|
8 |
ReferencerEditor_19 = Показ последней страницы результатов |
|
9 |
ReferencerEditor_21 = Показ следующей страницы результатов |
|
10 |
ReferencerEditor_23 = Показ предыдущей страницы результатов |
|
11 |
ReferencerEditor_25 = Открытие конкорданса |
|
12 |
ReferencerEditor_5 = Расчет ссылок {0} |
|
13 |
ReferencerEditor_6 = Шаблон ссылок |
|
14 |
ReferencerEditor_7 = Ни одного результата |
|
15 |
ReferencerEditor_8 = Получение употреблений |
|
16 |
|
|
17 |
ReferencerPreferencePage_2 = Ссылки |
|
18 |
ReferencerPreferencePage_3 = Сортировать ссылки по частотности |
|
0 | 19 |
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/messages/messages_fr.properties (revision 687) | ||
---|---|---|
1 |
|
|
2 |
ReferencerEditor_0 = Calcul des références de <{0}> dans {1} avec la propriété {2} et le patron {3} |
|
3 |
ReferencerEditor_10 = Récupération des index |
|
4 |
ReferencerEditor_11 = Références |
|
5 |
ReferencerEditor_12 = Regroupement des positions par index |
|
6 |
ReferencerEditor_13 = Mise à jour de l'interface |
|
7 |
ReferencerEditor_17 = Affichage de la première page de résultats |
|
8 |
ReferencerEditor_19 = Affichage de la dernière page de résultats |
|
9 |
ReferencerEditor_21 = Affichage de la page de résultats suivante |
|
10 |
ReferencerEditor_23 = Affichage de la page de résultats précédente |
|
11 |
ReferencerEditor_25 = Ouverture de la concordance |
|
12 |
ReferencerEditor_5 = Calcul des références de {0} |
|
13 |
ReferencerEditor_6 = Patron des références |
|
14 |
ReferencerEditor_7 = Aucun résultat |
|
15 |
ReferencerEditor_8 = Récupération des occurrences |
|
16 |
|
|
17 |
ReferencerPreferencePage_2 = Références |
|
18 |
ReferencerPreferencePage_3 = Ordonner les références par fréquence |
|
0 | 19 |
tmp/org.txm.referencer.rcp/src/org/txm/referencer/rcp/editors/ReferencerEditor.java (revision 687) | ||
---|---|---|
31 | 31 |
import java.util.Collections; |
32 | 32 |
import java.util.List; |
33 | 33 |
|
34 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
35 |
import org.eclipse.jface.action.MenuManager; |
|
36 | 34 |
import org.eclipse.jface.viewers.TableViewer; |
37 | 35 |
import org.eclipse.osgi.util.NLS; |
38 | 36 |
import org.eclipse.swt.SWT; |
... | ... | |
50 | 48 |
import org.eclipse.swt.widgets.Button; |
51 | 49 |
import org.eclipse.swt.widgets.Composite; |
52 | 50 |
import org.eclipse.swt.widgets.Display; |
53 |
import org.eclipse.swt.widgets.Menu; |
|
54 | 51 |
import org.eclipse.swt.widgets.TableColumn; |
55 | 52 |
import org.eclipse.ui.IEditorInput; |
56 | 53 |
import org.eclipse.ui.IEditorSite; |
57 | 54 |
import org.eclipse.ui.PartInitException; |
55 |
import org.txm.core.messages.TXMCoreMessages; |
|
58 | 56 |
import org.txm.core.preferences.TXMPreferences; |
59 | 57 |
import org.txm.core.results.TXMResult; |
60 |
import org.txm.rcp.RCPMessages; |
|
61 | 58 |
import org.txm.rcp.StatusLine; |
62 | 59 |
import org.txm.rcp.editors.TXMEditor; |
63 | 60 |
import org.txm.rcp.editors.TXMResultEditorInput; |
64 | 61 |
import org.txm.rcp.editors.TableKeyListener; |
62 |
import org.txm.rcp.messages.TXMUIMessages; |
|
65 | 63 |
import org.txm.rcp.preferences.RCPPreferences; |
66 | 64 |
import org.txm.rcp.swt.widget.NavigationWidget; |
67 | 65 |
import org.txm.rcp.swt.widget.PropertiesSelector; |
... | ... | |
71 | 69 |
import org.txm.referencer.core.functions.Referencer; |
72 | 70 |
import org.txm.referencer.core.functions.Referencer.Line; |
73 | 71 |
import org.txm.referencer.rcp.handlers.ReferencerToConc; |
72 |
import org.txm.referencer.rcp.messages.ReferencerUIMessages; |
|
74 | 73 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
75 | 74 |
import org.txm.searchengine.cqp.corpus.Corpus; |
76 | 75 |
import org.txm.searchengine.cqp.corpus.Property; |
... | ... | |
185 | 184 |
//setPartName(referencer.getName()); //$NON-NLS-1$ |
186 | 185 |
|
187 | 186 |
viewer.getTable().setFocus(); |
188 |
System.out.println(NLS.bind(RCPMessages.CreatePartition_5, referencer.getNLines(), referencer.getV()));
|
|
187 |
System.out.println(NLS.bind(TXMUIMessages.CreatePartition_5, referencer.getNLines(), referencer.getV()));
|
|
189 | 188 |
// } |
190 | 189 |
// }); |
191 | 190 |
// |
... | ... | |
251 | 250 |
|
252 | 251 |
// [Search] |
253 | 252 |
Button go = new Button(paramArea, SWT.PUSH); |
254 |
go.setText(RCPMessages.SEARCH);
|
|
253 |
go.setText(TXMUIMessages.SEARCH);
|
|
255 | 254 |
go.setLayoutData(new GridData(GridData.FILL, GridData.FILL, |
256 | 255 |
false, false)); |
257 | 256 |
Font f = go.getFont(); |
... | ... | |
276 | 275 |
patternArea = new PropertiesSelector<StructuralUnitProperty>(paramArea, SWT.NONE); |
277 | 276 |
patternArea.setLayoutData(new GridData(GridData.FILL, GridData.FILL, false, false,3,1)); |
278 | 277 |
patternArea.setLayout(new GridLayout(3, false)); |
279 |
patternArea.setText(RCPMessages.ReferencerEditor_6);
|
|
278 |
patternArea.setText(ReferencerUIMessages.ReferencerEditor_6);
|
|
280 | 279 |
patternArea.addSelectionListener(new SelectionListener() { |
281 | 280 |
@Override |
282 | 281 |
public void widgetSelected(SelectionEvent e) { |
... | ... | |
299 | 298 |
|
300 | 299 |
@Override |
301 | 300 |
public void widgetSelected(SelectionEvent e) { |
302 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_17);
|
|
301 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_17);
|
|
303 | 302 |
fillDisplayArea(0, lineperpage); |
304 | 303 |
viewer.getTable().select(0); |
305 | 304 |
viewer.getTable().showSelection(); |
... | ... | |
313 | 312 |
|
314 | 313 |
@Override |
315 | 314 |
public void widgetSelected(SelectionEvent e) { |
316 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_19);
|
|
315 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_19);
|
|
317 | 316 |
fillDisplayArea(referencer.getNLines() |
318 | 317 |
- lineperpage, referencer.getNLines()); |
319 | 318 |
viewer.getTable().select(0); |
... | ... | |
328 | 327 |
|
329 | 328 |
@Override |
330 | 329 |
public void widgetSelected(SelectionEvent e) { |
331 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_21);
|
|
330 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_21);
|
|
332 | 331 |
fillDisplayArea(topLine + lineperpage, |
333 | 332 |
bottomLine + lineperpage); |
334 | 333 |
viewer.getTable().select(0); |
... | ... | |
343 | 342 |
|
344 | 343 |
@Override |
345 | 344 |
public void widgetSelected(SelectionEvent e) { |
346 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_23);
|
|
345 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_23);
|
|
347 | 346 |
fillDisplayArea(topLine - lineperpage, |
348 | 347 |
bottomLine - lineperpage); |
349 | 348 |
viewer.getTable().select(0); |
... | ... | |
384 | 383 |
.getSelection(), e.y); |
385 | 384 |
int col = getPointedColumn(mouseposition); |
386 | 385 |
if (col == 1) { |
387 |
StatusLine.setMessage(RCPMessages.ReferencerEditor_25);
|
|
386 |
StatusLine.setMessage(ReferencerUIMessages.ReferencerEditor_25);
|
|
388 | 387 |
sendSelectionToConc(); |
389 | 388 |
} else if (col == 2) { |
390 | 389 |
//System.out.println("Send to edition ?"); //$NON-NLS-1$ |
... | ... | |
397 | 396 |
nColumn.pack(); |
398 | 397 |
|
399 | 398 |
unitColumn = new TableColumn(viewer.getTable(), SWT.LEFT); |
400 |
unitColumn.setText(RCPMessages.ReferencerEditor_9);
|
|
401 |
unitColumn.setToolTipText(RCPMessages.ReferencerEditor_9);
|
|
399 |
unitColumn.setText(TXMCoreMessages.common_units);
|
|
400 |
unitColumn.setToolTipText(TXMCoreMessages.common_units);
|
|
402 | 401 |
unitColumn.setWidth(200); |
403 | 402 |
unitColumn.addSelectionListener(new SelectionListener() { |
404 | 403 |
@Override |
... | ... | |
412 | 411 |
}); |
413 | 412 |
|
414 | 413 |
freqColumn = new TableColumn(viewer.getTable(), SWT.LEFT); |
415 |
freqColumn.setText(RCPMessages.ReferencerEditor_11);
|
|
416 |
freqColumn.setToolTipText(RCPMessages.ReferencerEditor_11);
|
|
414 |
freqColumn.setText(ReferencerUIMessages.ReferencerEditor_11);
|
|
415 |
freqColumn.setToolTipText(ReferencerUIMessages.ReferencerEditor_11);
|
|
417 | 416 |
freqColumn.setWidth(100); |
418 | 417 |
freqColumn.addSelectionListener(new SelectionListener() { |
419 | 418 |
@Override |
tmp/org.txm.querycooccurrences.rcp/src/org/txm/rcp/commands/function/ComputeQueryCooccurrence.java (revision 687) | ||
---|---|---|
57 | 57 |
import org.eclipse.swt.widgets.Shell; |
58 | 58 |
import org.eclipse.swt.widgets.Spinner; |
59 | 59 |
import org.eclipse.ui.handlers.HandlerUtil; |
60 |
import org.txm.core.messages.TXMCoreMessages; |
|
60 | 61 |
import org.txm.functions.coocmatrix.QueryCooccurrence; |
61 | 62 |
//import org.txm.functions.queryindex.*; |
62 | 63 |
import org.txm.rcp.JobsTimer; |
63 |
import org.txm.rcp.RCPMessages;
|
|
64 |
import org.txm.rcp.messages.TXMUIMessages;
|
|
64 | 65 |
import org.txm.rcp.utils.JobHandler; |
65 | 66 |
import org.txm.rcp.views.QueriesView; |
66 | 67 |
import org.txm.rcp.views.corpora.CorporaView; |
... | ... | |
100 | 101 |
JobsTimer.start(); |
101 | 102 |
|
102 | 103 |
if (!(element instanceof Corpus)) { |
103 |
System.out.println(RCPMessages.ComputeCooccurrences_1+ element);
|
|
104 |
System.out.println(TXMUIMessages.ComputeCooccurrences_1+ element);
|
|
104 | 105 |
return Status.CANCEL_STATUS; |
105 | 106 |
} |
106 | 107 |
Corpus corpus = ((Corpus)element); |
... | ... | |
121 | 122 |
|
122 | 123 |
if (cooc == null) return null; |
123 | 124 |
|
124 |
System.out.println(RCPMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
125 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
125 | 126 |
|
126 | 127 |
syncExec(new Runnable() { |
127 | 128 |
@Override |
... | ... | |
136 | 137 |
return Status.CANCEL_STATUS; |
137 | 138 |
} catch (Exception e) { |
138 | 139 |
Log.printStackTrace(e); |
139 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
140 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
140 | 141 |
} catch (Exception e1) { e1.printStackTrace(); } |
141 | 142 |
} finally { |
142 | 143 |
monitor.done(); |
... | ... | |
162 | 163 |
getQueriesAndNames(dialog.getQueryFile1(), queries1, names1); |
163 | 164 |
getQueriesAndNames(dialog.getQueryFile2(), queries2, names2); |
164 | 165 |
|
165 |
System.out.println(RCPMessages.ProgressionDialog_8+queries1);
|
|
166 |
System.out.println(RCPMessages.MergeCols_5+names1);
|
|
166 |
System.out.println(TXMCoreMessages.common_queries + queries1);
|
|
167 |
System.out.println(TXMUIMessages.MergeCols_5+names1);
|
|
167 | 168 |
|
168 | 169 |
QueryCooccurrence cooc = null; |
169 | 170 |
try { |
... | ... | |
171 | 172 |
System.out.println("Number of queries: "+nqueries); |
172 | 173 |
System.out.println("Estimated time: "+(nqueries*reqtime/1000)+" secs"); |
173 | 174 |
cooc = new QueryCooccurrence(corpus, queries1, names1, queries2, names2, dist, min, struct); |
174 |
System.out.println(RCPMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
175 |
System.out.println(RCPMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
176 |
System.out.println(RCPMessages.ComputeCoocMatrix_3+dist);
|
|
175 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
176 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
177 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_3+dist);
|
|
177 | 178 |
} catch (Exception e) { |
178 | 179 |
// TODO Auto-generated catch block |
179 | 180 |
Log.printStackTrace(e); |
180 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
181 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
181 | 182 |
} catch (Exception e1) { e1.printStackTrace(); } |
182 | 183 |
} |
183 | 184 |
return cooc; |
... | ... | |
239 | 240 |
@Override |
240 | 241 |
protected void configureShell(Shell newShell) { |
241 | 242 |
super.configureShell(newShell); |
242 |
newShell.setText(RCPMessages.ComputeCoocMatrix_7);
|
|
243 |
newShell.setText(TXMUIMessages.ComputeCoocMatrix_7);
|
|
243 | 244 |
} |
244 | 245 |
|
245 | 246 |
/* (non-Javadoc) |
... | ... | |
254 | 255 |
composite.setLayout(layout); |
255 | 256 |
|
256 | 257 |
Label anaPropLabel = new Label(composite, SWT.NONE); |
257 |
anaPropLabel.setText(RCPMessages.ParaBrowserEditor_5);
|
|
258 |
anaPropLabel.setText(TXMCoreMessages.common_structure);
|
|
258 | 259 |
anaPropLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, false, false)); |
259 | 260 |
|
260 | 261 |
propsCombo = new org.eclipse.swt.widgets.List(composite, SWT.READ_ONLY); |
... | ... | |
277 | 278 |
} |
278 | 279 |
|
279 | 280 |
Label distLabel = new Label(composite, SWT.NONE); |
280 |
distLabel.setText(RCPMessages.ComputeCoocMatrix_10);
|
|
281 |
distLabel.setText(TXMUIMessages.ComputeCoocMatrix_10);
|
|
281 | 282 |
distLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
282 | 283 |
false, false)); |
283 | 284 |
distSpinner = new Spinner(composite, SWT.BORDER); |
... | ... | |
286 | 287 |
distSpinner.setSelection(10); |
287 | 288 |
|
288 | 289 |
Label minLabel = new Label(composite, SWT.NONE); |
289 |
minLabel.setText(RCPMessages.ComputeCoocMatrix_11);
|
|
290 |
minLabel.setText(TXMUIMessages.ComputeCoocMatrix_11);
|
|
290 | 291 |
minLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
291 | 292 |
false, false)); |
292 | 293 |
minSpinner = new Spinner(composite, SWT.BORDER); |
tmp/org.txm.querycooccurrences.rcp/src/org/txm/rcp/commands/function/ComputeQueryAutoCooccurrence.java (revision 687) | ||
---|---|---|
53 | 53 |
import org.eclipse.swt.widgets.Shell; |
54 | 54 |
import org.eclipse.swt.widgets.Spinner; |
55 | 55 |
import org.eclipse.ui.handlers.HandlerUtil; |
56 |
import org.txm.core.messages.TXMCoreMessages; |
|
56 | 57 |
import org.txm.functions.coocmatrix.QueryAutoCooccurrence; |
57 | 58 |
import org.txm.functions.queryindex.QueryIndex; |
58 | 59 |
import org.txm.functions.queryindex.QueryIndexLine; |
59 | 60 |
import org.txm.rcp.JobsTimer; |
60 |
import org.txm.rcp.RCPMessages;
|
|
61 |
import org.txm.rcp.messages.TXMUIMessages;
|
|
61 | 62 |
import org.txm.rcp.utils.JobHandler; |
62 | 63 |
import org.txm.rcp.views.QueriesView; |
63 | 64 |
import org.txm.rcp.views.corpora.CorporaView; |
... | ... | |
98 | 99 |
JobsTimer.start(); |
99 | 100 |
|
100 | 101 |
if (!(element instanceof QueryIndex)) { |
101 |
System.out.println(RCPMessages.ComputeCooccurrences_1+ element);
|
|
102 |
System.out.println(TXMUIMessages.ComputeCooccurrences_1+ element);
|
|
102 | 103 |
return Status.CANCEL_STATUS; |
103 | 104 |
} |
104 | 105 |
QueryIndex qi = ((QueryIndex)element); |
... | ... | |
120 | 121 |
|
121 | 122 |
if (cooc == null) return null; |
122 | 123 |
|
123 |
System.out.println(RCPMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
124 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_0+cooc.getSymbol());
|
|
124 | 125 |
|
125 | 126 |
syncExec(new Runnable() { |
126 | 127 |
@Override |
... | ... | |
135 | 136 |
return Status.CANCEL_STATUS; |
136 | 137 |
} catch (Exception e) { |
137 | 138 |
Log.printStackTrace(e); |
138 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
139 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
139 | 140 |
} catch (Exception e1) { e1.printStackTrace(); } |
140 | 141 |
} finally { |
141 | 142 |
monitor.done(); |
... | ... | |
164 | 165 |
names.add(line.getName()); |
165 | 166 |
} |
166 | 167 |
|
167 |
System.out.println(RCPMessages.ProgressionDialog_8+queries);
|
|
168 |
System.out.println(RCPMessages.MergeCols_5+names);
|
|
168 |
System.out.println(TXMCoreMessages.common_queries+queries);
|
|
169 |
System.out.println(TXMUIMessages.MergeCols_5+names);
|
|
169 | 170 |
|
170 | 171 |
QueryAutoCooccurrence cooc = null; |
171 | 172 |
try { |
... | ... | |
175 | 176 |
System.out.println("Estimated time: "+(nqueries*reqtime/1000)+" secs"); |
176 | 177 |
cooc = new QueryAutoCooccurrence(corpus); |
177 | 178 |
cooc.setParameters(queries, names, dist, min, struct, oriented, new ConsoleProgressBar(nqueries)); |
178 |
System.out.println(RCPMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
179 |
System.out.println(RCPMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
180 |
System.out.println(RCPMessages.ComputeCoocMatrix_3+dist);
|
|
179 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_1+cooc.getNNodes());
|
|
180 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_2+cooc.getNEdges());
|
|
181 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_3+dist);
|
|
181 | 182 |
|
182 | 183 |
if (outfile != null) { |
183 | 184 |
cooc.toGraphml(outfile); |
184 |
System.out.println(RCPMessages.ComputeCoocMatrix_4+outfile);
|
|
185 |
System.out.println(TXMUIMessages.ComputeCoocMatrix_4+outfile);
|
|
185 | 186 |
} |
186 | 187 |
|
187 | 188 |
} catch (Exception e) { |
188 | 189 |
Log.printStackTrace(e); |
189 |
try { System.out.println(RCPMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
190 |
try { System.out.println(TXMUIMessages.LastCQPError+CQPEngine.getCqiClient().getLastCQPError());
|
|
190 | 191 |
} catch (Exception e1) { e1.printStackTrace(); } |
191 | 192 |
} |
192 | 193 |
return cooc; |
... | ... | |
226 | 227 |
@Override |
227 | 228 |
protected void configureShell(Shell newShell) { |
228 | 229 |
super.configureShell(newShell); |
229 |
newShell.setText(RCPMessages.ComputeCoocMatrix_7);
|
|
230 |
newShell.setText(TXMUIMessages.ComputeCoocMatrix_7);
|
|
230 | 231 |
} |
231 | 232 |
|
232 | 233 |
/* (non-Javadoc) |
... | ... | |
241 | 242 |
composite.setLayout(layout); |
242 | 243 |
|
243 | 244 |
Label anaPropLabel = new Label(composite, SWT.NONE); |
244 |
anaPropLabel.setText(RCPMessages.ParaBrowserEditor_5);
|
|
245 |
anaPropLabel.setText(TXMCoreMessages.common_structure);
|
|
245 | 246 |
anaPropLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, false, false)); |
246 | 247 |
|
247 | 248 |
propsCombo = new org.eclipse.swt.widgets.List(composite, SWT.READ_ONLY); |
... | ... | |
264 | 265 |
} |
265 | 266 |
|
266 | 267 |
Label distLabel = new Label(composite, SWT.NONE); |
267 |
distLabel.setText(RCPMessages.ComputeCoocMatrix_10);
|
|
268 |
distLabel.setText(TXMUIMessages.ComputeCoocMatrix_10);
|
|
268 | 269 |
distLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
269 | 270 |
false, false)); |
270 | 271 |
distSpinner = new Spinner(composite, SWT.BORDER); |
... | ... | |
273 | 274 |
distSpinner.setSelection(10); |
274 | 275 |
|
275 | 276 |
Label minLabel = new Label(composite, SWT.NONE); |
276 |
minLabel.setText(RCPMessages.ComputeCoocMatrix_11);
|
|
277 |
minLabel.setText(TXMUIMessages.ComputeCoocMatrix_11);
|
|
277 | 278 |
minLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
278 | 279 |
false, false)); |
279 | 280 |
minSpinner = new Spinner(composite, SWT.BORDER); |
... | ... | |
289 | 290 |
orientedButton.setSelection(false); |
290 | 291 |
|
291 | 292 |
outfileLabel = new Label(composite, SWT.NONE); |
292 |
outfileLabel.setText(RCPMessages.ComputeCoocMatrix_12);
|
|
293 |
outfileLabel.setText(TXMUIMessages.ComputeCoocMatrix_12);
|
|
293 | 294 |
outfileLabel.setLayoutData(new GridData(GridData.END, GridData.CENTER, |
294 | 295 |
false, false)); |
295 | 296 |
|
296 | 297 |
Button outfileButton = new Button(composite, SWT.PUSH); |
297 |
outfileButton.setText(RCPMessages.ComputeCoocMatrix_13);
|
|
298 |
outfileButton.setText(TXMUIMessages.ComputeCoocMatrix_13);
|
|
298 | 299 |
outfileButton.addSelectionListener(new SelectionListener() { |
299 | 300 |
@Override |
300 | 301 |
public void widgetSelected(SelectionEvent e) { |
tmp/org.txm.treetagger.rcp/plugin.xml (revision 687) | ||
---|---|---|
6 | 6 |
point="org.eclipse.ui.commands"> |
7 | 7 |
<command |
8 | 8 |
categoryId="org.txm.rcp.category.txm" |
9 |
defaultHandler="org.txm.treetagger.commands.Train"
|
|
9 |
defaultHandler="org.txm.treetagger.rcp.handlers.Train"
|
|
10 | 10 |
id="org.txm.treetagger.commands.Train" |
11 | 11 |
name="Train"> |
12 | 12 |
</command> |
13 | 13 |
<command |
14 | 14 |
categoryId="org.txm.rcp.category.txm" |
15 |
defaultHandler="org.txm.treetagger.commands.Apply"
|
|
15 |
defaultHandler="org.txm.treetagger.rcp.handlers.Apply"
|
|
16 | 16 |
id="org.txm.treetagger.commands.Apply" |
17 | 17 |
name="Apply"> |
18 | 18 |
</command> |
19 | 19 |
<command |
20 | 20 |
categoryId="org.txm.rcp.category.txm" |
21 |
defaultHandler="org.txm.treetagger.commands.LemmaProjection"
|
|
21 |
defaultHandler="org.txm.treetagger.rcp.handlers.LemmaProjection"
|
|
22 | 22 |
id="org.txm.treetagger.commands.PropertyProjection" |
23 | 23 |
name="LemmaProjection"> |
24 | 24 |
</command> |
25 | 25 |
<command |
26 | 26 |
categoryId="org.txm.rcp.category.txm" |
27 |
defaultHandler="org.txm.treetagger.commands.RemoveProperties"
|
|
28 |
description="Remove properties fomr corpus XML-TXM files"
|
|
27 |
defaultHandler="org.txm.treetagger.rcp.handlers.RemoveProperties"
|
|
28 |
description="Remove properties from corpus XML-TXM files"
|
|
29 | 29 |
id="org.txm.treetagger.commands.RemoveProperties" |
30 | 30 |
name="RemoveProperties"> |
31 | 31 |
</command> |
... | ... | |
85 | 85 |
</menu> |
86 | 86 |
</menuContribution> |
87 | 87 |
</extension> |
88 |
<extension |
|
89 |
point="org.eclipse.ui.preferencePages"> |
|
90 |
<page |
|
91 |
category="org.txm.rcp.preferences.NLPPreferencePage" |
|
92 |
class="org.txm.treetagger.rcp.preferences.TreeTaggerPreferencePage" |
|
93 |
id="org.txm.treetagger.rcp.preferences.TreeTaggerPreferencePage" |
|
94 |
name="%page.name.7"> |
|
95 |
</page> |
|
96 |
</extension> |
|
88 | 97 |
|
89 | 98 |
</plugin> |
tmp/org.txm.treetagger.rcp/META-INF/MANIFEST.MF (revision 687) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 | 2 |
Bundle-ManifestVersion: 2 |
3 | 3 |
Bundle-Name: TreeTagger |
4 |
Bundle-SymbolicName: TreeTagger;singleton:=true
|
|
4 |
Bundle-SymbolicName: org.txm.treetagger.rcp;singleton:=true
|
|
5 | 5 |
Bundle-Version: 1.0.0.qualifier |
6 |
Bundle-Activator: treetagger.Activator |
|
7 | 6 |
Require-Bundle: org.eclipse.ui, |
7 |
org.txm.treetagger.core;bundle-version="1.0.0", |
|
8 | 8 |
org.eclipse.core.runtime, |
9 | 9 |
org.eclipse.swt, |
10 | 10 |
org.txm.core;bundle-version="0.7.0", |
tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/Train.java (revision 687) | ||
---|---|---|
1 |
package org.txm.treetagger.rcp.handlers; |
|
2 |
|
|
3 |
import java.io.BufferedOutputStream; |
|
4 |
import java.io.BufferedReader; |
|
5 |
import java.io.BufferedWriter; |
|
6 |
import java.io.File; |
|
7 |
import java.io.FileOutputStream; |
|
8 |
import java.io.OutputStreamWriter; |
|
9 |
import java.io.PrintStream; |
|
10 |
import java.io.PrintWriter; |
|
11 |
import java.util.ArrayList; |
|
12 |
import java.util.Arrays; |
|
13 |
import java.util.HashMap; |
|
14 |
import java.util.HashSet; |
|
15 |
import java.util.LinkedHashMap; |
|
16 |
import java.util.LinkedHashSet; |
|
17 |
import java.util.List; |
|
18 |
|
|
19 |
import org.eclipse.core.commands.AbstractHandler; |
|
20 |
import org.eclipse.core.commands.ExecutionEvent; |
|
21 |
import org.eclipse.core.commands.ExecutionException; |
|
22 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
23 |
import org.eclipse.core.runtime.IStatus; |
|
24 |
import org.eclipse.core.runtime.Status; |
|
25 |
import org.eclipse.jface.viewers.ISelection; |
|
26 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
27 |
import org.eclipse.ui.IWorkbenchWindow; |
|
28 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
29 |
import org.kohsuke.args4j.Option; |
|
30 |
import org.txm.core.preferences.TBXPreferences; |
|
31 |
import org.txm.core.preferences.TXMPreferences; |
|
32 |
import org.txm.index.core.functions.Index; |
|
33 |
import org.txm.index.core.functions.Line; |
|
34 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
35 |
import org.txm.rcp.utils.JobHandler; |
|
36 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
37 |
import org.txm.searchengine.cqp.CQPEngine; |
|
38 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
39 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
40 |
import org.txm.searchengine.cqp.corpus.Property; |
|
41 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
42 |
import org.txm.searchengine.cqp.corpus.query.Query; |
|
43 |
import org.txm.treetagger.core.preferences.TreeTaggerPreferences; |
|
44 |
import org.txm.utils.DeleteDir; |
|
45 |
import org.txm.utils.io.IOUtils; |
|
46 |
import org.txm.utils.logger.Log; |
|
47 |
import org.txm.utils.treetagger.TreeTagger; |
|
48 |
|
|
49 |
/** |
|
50 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
51 |
* @see org.eclipse.core.commands.IHandler |
|
52 |
* @see org.eclipse.core.commands.AbstractHandler |
|
53 |
*/ |
|
54 |
public class Train extends AbstractHandler { |
|
55 |
|
|
56 |
public Corpus corpus = null; |
|
57 |
|
|
58 |
@Option(name="model", usage="The model file to create", widget="CreateFile", required=true, def="fr.par") |
|
59 |
public File model = null; |
|
60 |
@Option(name="posProperty", usage="The pos property", widget="String", required=true, def="frpos") |
|
61 |
public String posProperty = null; |
|
62 |
@Option(name="sentenceTag", usage="The pos property", widget="String", required=true, def="SENT") |
|
63 |
public String sentenceTag = null; |
|
64 |
@Option(name="lemmaProperty", usage="The lemma property", widget="String", required=true, def="frlemma") |
|
65 |
public String lemmaProperty = null; |
|
66 |
@Option(name="lexique", usage="Lexicon file", widget="File", required=true, def="lexicon.txt") |
|
67 |
public File lexique = null; |
|
68 |
@Option(name="options", usage="TreeTagger supplementary options", widget="String", required=true, def="") |
|
69 |
public String options = null; |
|
70 |
|
|
71 |
/** |
|
72 |
* |
|
73 |
*/ |
|
74 |
public Object execute(ExecutionEvent event) throws ExecutionException { |
|
75 |
|
|
76 |
|
|
77 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
78 |
|
|
79 |
ISelection isel = window.getActivePage().getSelection(); |
|
80 |
if (isel instanceof IStructuredSelection) { |
|
81 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
82 |
Object first = sel.getFirstElement(); |
|
83 |
if (first instanceof Corpus) { |
|
84 |
corpus = (Corpus)first; |
|
85 |
if (ParametersDialog.open(this)) { |
|
86 |
|
|
87 |
train(corpus, model, lexique, new String[]{posProperty, lemmaProperty}, sentenceTag, options.split(" ")); |
|
88 |
|
|
89 |
return corpus; |
|
90 |
} |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
System.out.println("Wrong selection."); |
|
95 |
return null; |
|
96 |
} |
|
97 |
|
|
98 |
public static void train(final Corpus corpus, final File model, final File lexique, final String[] properties, final String sentenceTag, final String[] options) { |
|
99 |
|
|
100 |
JobHandler job = new JobHandler("Applying TreeTagger to "+corpus+" corpus.") { |
|
101 |
@Override |
|
102 |
protected IStatus run(IProgressMonitor monitor) { |
|
103 |
this.runInit(monitor); |
|
104 |
try { |
|
105 |
File lexique2 = lexique; |
|
106 |
MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
107 |
File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
108 |
|
|
109 |
System.out.println("TRAIN : "+corpus+" with "+lexique2+" to create "+model+ " with properties "+Arrays.toString(properties)); |
|
110 |
|
|
111 |
if (properties == null || properties.length != 2) { |
|
112 |
System.out.println("Error can't continue with selected word properties: "+Arrays.toString(properties)); |
|
113 |
return Status.CANCEL_STATUS; |
|
114 |
} |
|
115 |
|
|
116 |
for (String p : properties) { |
|
117 |
Property prop = corpus.getProperty(p); |
|
118 |
if (prop == null) { |
|
119 |
System.out.println("Missing property in corpus: "+p); |
|
120 |
return Status.CANCEL_STATUS; |
|
121 |
} |
|
122 |
} |
|
123 |
|
|
124 |
Property pos = corpus.getProperty(properties[0]); |
|
125 |
Property lemma = corpus.getProperty(properties[1]); |
|
126 |
|
|
127 |
// Prepare temporary directory |
|
128 |
File treetaggerSrcDirectory = new File(mainCorpus.getBaseDirectory(), "treetagger"); |
|
129 |
DeleteDir.deleteDirectory(treetaggerSrcDirectory); |
|
130 |
treetaggerSrcDirectory.mkdirs(); |
|
131 |
|
|
132 |
HashMap<String, HashSet<String>> simplified_lexicon = null; |
|
133 |
HashMap<String, HashSet<String>> simplified_lexicon_errors = null; |
|
134 |
int error_counter = 0; |
|
135 |
// Create Lexicon file from an Index |
|
136 |
if (lexique2 == null || !lexique2.exists()) { |
|
137 |
System.out.println("Warning: no lexicon file or given lexicon file does not exist ("+lexique2+"). Using corpus Index..."); |
|
138 |
|
|
139 |
File lexiconfile = new File(treetaggerSrcDirectory, "lexicon.txt"); |
|
140 |
List<Property> corpusProperties = new ArrayList<Property>(); |
|
141 |
corpusProperties.add(mainCorpus.getProperty("word")); |
|
142 |
for (String p : properties) { |
|
143 |
Property prop = mainCorpus.getProperty(p); |
|
144 |
if (prop == null) { |
|
145 |
System.out.println("Error, a property is missing: "+p); |
|
146 |
return Status.CANCEL_STATUS; |
|
147 |
} |
|
148 |
corpusProperties.add(prop); |
|
149 |
} |
|
150 |
Index index = new Index(mainCorpus); |
|
151 |
index.setParameters(new Query("[]"), corpusProperties, null, null, null, null); |
|
152 |
index.compute(monitor); |
|
153 |
List<Line> lines = index.getAllLines(); |
|
154 |
LinkedHashMap<String, ArrayList<String>> lex = new LinkedHashMap<String, ArrayList<String>>(); |
|
155 |
HashMap<String, HashSet<String>> allPosValues = new HashMap<String, HashSet<String>>(); |
|
156 |
for (Line l : lines) { |
|
157 |
List<List<String>> values = l.getUnitsProperties(); |
|
158 |
String form = values.get(0).get(0); |
|
159 |
if (!lex.containsKey(form)) { |
|
160 |
ArrayList<String> pairs = new ArrayList<String>(); |
|
161 |
HashSet<String> posValues = new HashSet<String>(); |
|
162 |
|
|
163 |
allPosValues.put(form, posValues); |
|
164 |
lex.put(form, pairs); |
|
165 |
} |
|
166 |
ArrayList<String> pairs = lex.get(form); |
|
167 |
HashSet<String> posValues = allPosValues.get(form); |
|
168 |
String posValue = values.get(1).get(0); |
|
169 |
String lemmaValue = values.get(2).get(0); |
|
170 |
if (posValues.contains(posValue)) { |
|
171 |
|
|
172 |
} else { |
|
173 |
posValues.add(posValue); |
|
174 |
pairs.add(posValue); |
|
175 |
pairs.add(lemmaValue); |
|
176 |
} |
|
177 |
} |
|
178 |
|
|
179 |
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lexiconfile), "UTF-8")); |
|
180 |
for (String form : lex.keySet()) { |
|
181 |
|
|
182 |
writer.write(form); |
|
183 |
boolean tab = true; |
|
184 |
for (String v : lex.get(form)) { |
|
185 |
if (tab) writer.write("\t"+v); |
|
186 |
else writer.write(" "+v); |
|
187 |
|
|
188 |
tab = !tab; |
|
189 |
} |
|
190 |
writer.write("\n"); |
|
191 |
} |
|
192 |
writer.close(); |
|
193 |
lexique2 = lexiconfile; |
|
194 |
} else { // diagnose lexicon content |
|
195 |
simplified_lexicon = new HashMap<String, HashSet<String>>(); |
|
196 |
simplified_lexicon_errors = new HashMap<String, HashSet<String>>(); |
|
197 |
BufferedReader reader = IOUtils.getReader(lexique2); |
|
198 |
String line = reader.readLine(); |
|
199 |
while (line != null) { |
|
200 |
String[] split = line.split("\t", 2); |
|
201 |
HashSet<String> posValues = new HashSet<String>(); |
|
202 |
simplified_lexicon.put(split[0], posValues); |
|
203 |
for (String poslemme : split[1].split("\t")) { |
|
204 |
String[] split2 = poslemme.split(" ", 2); |
|
205 |
posValues.add(split2[0]); |
|
206 |
} |
|
207 |
line = reader.readLine(); |
|
208 |
} |
|
209 |
reader.close(); |
|
210 |
} |
|
211 |
|
|
212 |
|
|
213 |
// create TT SRC file from CWB indexes |
|
214 |
|
|
215 |
File ttSrcFile = new File(treetaggerSrcDirectory, mainCorpus.getName()+".tt"); |
|
216 |
System.out.println("TT SRC file: "+ttSrcFile.getAbsolutePath()); |
|
217 |
BufferedOutputStream fos = new BufferedOutputStream(new FileOutputStream(ttSrcFile)); |
|
218 |
PrintStream ps = new PrintStream(fos); |
|
219 |
LinkedHashSet<Integer> positions = new LinkedHashSet<Integer>(); |
|
220 |
Property word = corpus.getProperty("word"); |
|
221 |
AbstractCqiClient CQI = CQPEngine.getCqiClient(); |
|
222 |
for (Match m : corpus.getMatches()) { |
|
223 |
for (int i = m.getStart() ; i <= m.getEnd() ; i++) { // end match must be included |
|
224 |
positions.add(i); |
|
225 |
|
|
226 |
if (positions.size() >= 1000) { // avoid too big array |
|
227 |
int[] positions_array = new int[positions.size()]; |
|
228 |
int ip = 0; |
|
229 |
for (int p : positions) positions_array[ip++] = p; |
|
230 |
String[] words = CQI.cpos2Str(word.getQualifiedName(), positions_array); |
|
231 |
String[] values = CQI.cpos2Str(pos.getQualifiedName(), positions_array); |
|
232 |
|
|
233 |
for (int iW = 0 ; iW < words.length ; iW++) { |
|
234 |
String w = words[iW]; |
|
235 |
if (w != null) { |
|
236 |
String s = w+"\t"+values[iW]; |
|
237 |
ps.println(s); |
|
238 |
|
|
239 |
if (simplified_lexicon != null) { // check given lexicon |
|
240 |
if (simplified_lexicon.containsKey(w)) { |
|
241 |
if (!simplified_lexicon.get(w).contains(values[iW])) { |
|
242 |
//System.out.println("Lexicon error: cannot find pos="+values[iW]+" for form="+w); |
|
243 |
if (!simplified_lexicon_errors.containsKey(w)) simplified_lexicon_errors.put(w, new HashSet<String>()); |
|
244 |
HashSet<String> error_values = simplified_lexicon_errors.get(w); |
|
245 |
error_values.add(values[iW]); |
|
246 |
error_counter++; |
|
247 |
} |
|
248 |
} else { |
|
249 |
//System.out.println("Lexicon error: cannot find form="+w); |
|
250 |
if (!simplified_lexicon_errors.containsKey(w)) simplified_lexicon_errors.put(w, new HashSet<String>()); |
|
251 |
HashSet<String> error_values = simplified_lexicon_errors.get(w); |
|
252 |
error_values.add("#"+values[iW]); |
|
253 |
error_counter++; |
|
254 |
} |
|
255 |
} |
|
256 |
} |
|
257 |
} |
|
258 |
positions.clear(); |
|
259 |
} |
|
260 |
} |
|
261 |
} |
|
262 |
if (positions.size() > 0) { // write last words |
|
263 |
int[] positions_array = new int[positions.size()]; |
|
264 |
int ip = 0; |
|
265 |
for (int p : positions) positions_array[ip++] = p; |
|
266 |
String[] words = CQI.cpos2Str(word.getQualifiedName(), positions_array); |
|
267 |
String[] values = CQI.cpos2Str(pos.getQualifiedName(), positions_array); |
|
268 |
|
|
269 |
for (int iW = 0 ; iW < words.length ; iW++) { |
|
270 |
String w = words[iW]; |
|
271 |
if (w != null) { |
|
272 |
String s = w+"\t"+values[iW]; |
|
273 |
ps.println(s); |
|
274 |
} |
|
275 |
} |
|
276 |
positions.clear(); |
|
277 |
} |
|
278 |
ps.close(); |
|
279 |
|
|
280 |
if (simplified_lexicon_errors != null && simplified_lexicon_errors.size() > 0) { |
|
281 |
File error_file = new File(treetaggerSrcDirectory, "errors.txt"); |
|
282 |
PrintWriter errorwriter = IOUtils.getWriter(error_file); |
|
283 |
int c = 0; |
|
284 |
System.out.println("Warning, lexicon errors ("+error_counter+") found with words:"); |
|
285 |
for (String w : simplified_lexicon_errors.keySet()) { |
|
286 |
errorwriter.println(w+"="+simplified_lexicon_errors.get(w)); |
|
287 |
if (c < 10) { |
|
288 |
System.out.println(w+"="+simplified_lexicon_errors.get(w)); |
|
289 |
c++; |
|
290 |
if (c == 10) System.out.println("... errors display is trucated, see "+error_file.getAbsolutePath()); |
|
291 |
} |
|
292 |
} |
|
293 |
errorwriter.close(); |
|
294 |
//System.out.println("Cannot apply train-treetagger if lexicon is missing words and pos."); |
|
295 |
//return Status.CANCEL_STATUS; |
|
296 |
File lexique3 = new File(lexique2.getParentFile(), lexique2.getName()+".fix"); |
|
297 |
BufferedReader reader = IOUtils.getReader(lexique2); |
|
298 |
PrintWriter writer = IOUtils.getWriter(lexique3); |
|
299 |
String line = reader.readLine(); |
|
300 |
while (line != null) { |
|
301 |
String w = line.split("\t", 2)[0]; |
|
302 |
|
|
303 |
if (simplified_lexicon_errors.containsKey(w)) { |
|
304 |
for (String p : simplified_lexicon_errors.get(w)) { |
|
305 |
if (!p.startsWith("#")) |
|
306 |
line += ("\t"+p+" <no_lemma>"); // append missing value |
|
307 |
} |
|
308 |
simplified_lexicon_errors.remove(w); |
|
309 |
} |
|
310 |
|
|
311 |
writer.println(line); |
|
312 |
line = reader.readLine(); |
|
313 |
} |
|
314 |
|
|
315 |
// write missing words |
|
316 |
for (String w2 : simplified_lexicon_errors.keySet()) { |
|
317 |
writer.print(w2); |
|
318 |
for (String p : simplified_lexicon_errors.get(w2)) { |
|
319 |
writer.print("\t"+p+" <no_lemma>"); |
|
320 |
} |
|
321 |
writer.println(""); |
|
322 |
} |
|
323 |
|
|
324 |
reader.close(); |
|
325 |
writer.close(); |
|
326 |
System.out.println("Adding words to a temporary lexicon: "+lexique3); |
|
327 |
lexique2 = lexique3; |
|
328 |
} |
|
329 |
|
|
330 |
// Create open class file : contains all pos values |
|
331 |
File openclassfile = new File(treetaggerSrcDirectory, "openclasses.txt"); |
|
332 |
PrintWriter openClassFileWriter = IOUtils.getWriter(openclassfile); |
|
333 |
|
|
334 |
// Lexicon poslexicon = corpus.getLexicon(pos); |
|
335 |
// String[] posValues = poslexicon.getForms(); |
|
336 |
// for (int iV = 0 ; iV < posValues.length ; iV++) { |
|
337 |
// if (iV == 0) openClassFileWriter.print(posValues[iV]); |
|
338 |
// else openClassFileWriter.print(" "+posValues[iV]); |
|
339 |
// } |
|
340 |
openClassFileWriter.close(); |
|
341 |
|
|
342 |
// Call treetagger-train |
|
343 |
if (ttSrcFile.exists() && lexique2.exists() && openclassfile.exists()) { |
|
344 |
System.out.println("Running "); |
|
345 |
String treetaggerBinDirectory = new File(TXMPreferences.getString(TreeTaggerPreferences.INSTALL_PATH, TBXPreferences.PREFERENCES_NODE), "bin").getAbsolutePath(); |
|
346 |
if (!treetaggerBinDirectory.endsWith("/")) treetaggerBinDirectory += "/"; |
|
347 |
|
|
348 |
TreeTagger tt = new TreeTagger(treetaggerBinDirectory, options); |
|
349 |
tt.settoken(); |
|
350 |
tt.setquiet(); |
|
351 |
tt.setlemma(); |
|
352 |
tt.setsgml(); |
|
353 |
tt.setst(sentenceTag); |
|
354 |
tt.setproto(); |
|
355 |
tt.setutf8(); |
|
356 |
tt.debug(true); |
|
357 |
tt.traintreetagger(lexique2.getAbsolutePath(), openclassfile.getAbsolutePath(), ttSrcFile.getAbsolutePath(), model.getAbsolutePath()); |
|
358 |
|
|
359 |
System.out.println("Done: "+model.getAbsolutePath()); |
|
360 |
} else { |
|
361 |
System.out.println("Aborting."); |
|
362 |
} |
|
363 |
|
|
364 |
return Status.OK_STATUS; |
|
365 |
} catch (Exception e) { |
|
366 |
System.out.println("Error while training TT: "+e); |
|
367 |
Log.printStackTrace(e); |
|
368 |
} |
|
369 |
return Status.CANCEL_STATUS; |
|
370 |
} |
|
371 |
}; |
|
372 |
job.schedule(); |
|
373 |
} |
|
374 |
} |
|
0 | 375 |
tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/Apply.java (revision 687) | ||
---|---|---|
1 |
package org.txm.treetagger.rcp.handlers; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.FileFilter; |
|
5 |
import java.util.Arrays; |
|
6 |
import java.util.HashMap; |
|
7 |
|
|
8 |
import org.eclipse.core.commands.AbstractHandler; |
|
9 |
import org.eclipse.core.commands.ExecutionEvent; |
|
10 |
import org.eclipse.core.commands.ExecutionException; |
|
11 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
12 |
import org.eclipse.core.runtime.IStatus; |
|
13 |
import org.eclipse.core.runtime.Status; |
|
14 |
import org.eclipse.jface.viewers.ISelection; |
|
15 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
16 |
import org.eclipse.ui.IWorkbenchWindow; |
|
17 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
18 |
import org.kohsuke.args4j.Option; |
|
19 |
import org.txm.rcp.commands.workspace.UpdateCorpus; |
|
20 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
21 |
import org.txm.rcp.utils.JobHandler; |
|
22 |
import org.txm.scripts.teitxm.Annotate; |
|
23 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
24 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
25 |
|
|
26 |
/** |
|
27 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
28 |
* @see org.eclipse.core.commands.IHandler |
|
29 |
* @see org.eclipse.core.commands.AbstractHandler |
|
30 |
*/ |
|
31 |
public class Apply extends AbstractHandler { |
|
32 |
|
|
33 |
@Option(name="model", usage="Model file", widget="File", required=true, def="model.par") |
|
34 |
public File model = null; |
|
35 |
@Option(name="posProperty", usage="The pos property", widget="String", required=true, def="frpos") |
|
36 |
public String posProperty = null; |
|
37 |
@Option(name="lemmaProperty", usage="The lemma property", widget="String", required=true, def="frlemma") |
|
38 |
public String lemmaProperty = null; |
|
39 |
@Option(name="options", usage="TreeTagger supplementary options", widget="String", required=true, def="") |
|
40 |
public String options = null; |
|
41 |
|
|
42 |
/** |
|
43 |
* |
|
44 |
*/ |
|
45 |
public Object execute(ExecutionEvent event) throws ExecutionException { |
|
46 |
Corpus corpus = null; |
|
47 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
48 |
|
|
49 |
ISelection isel = window.getActivePage().getSelection(); |
|
50 |
if (isel instanceof IStructuredSelection) { |
|
51 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
52 |
Object first = sel.getFirstElement(); |
|
53 |
if (first instanceof Corpus) { |
|
54 |
corpus = (Corpus)first; |
|
55 |
if (ParametersDialog.open(this)) { |
|
56 |
apply(corpus, model, new String[]{posProperty, lemmaProperty}, options.split(" ")); |
|
57 |
return corpus; |
|
58 |
} |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
System.out.println("Wrong selection."); |
|
63 |
return null; |
|
64 |
} |
|
65 |
|
|
66 |
public static void apply(Corpus corpus, final File model, final String[] properties, final String[] options) { |
|
67 |
final MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
68 |
final File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
69 |
final File txmDirectory = new File(corpusBinaryDirectory, "txm/"+mainCorpus.getName()); |
|
70 |
|
|
71 |
if (!txmDirectory.exists()) { |
|
72 |
System.out.println("Can't apply TreeTagger to a corpus with no XML-TXM files."); |
|
73 |
} |
|
74 |
|
|
75 |
final File[] files = txmDirectory.listFiles(new FileFilter() { |
|
76 |
@Override |
|
77 |
public boolean accept(File file) { |
|
78 |
return file.isFile() && file.canWrite() && file.getName().endsWith(".xml"); |
|
79 |
} |
|
80 |
}); |
|
81 |
|
|
82 |
if (files == null || files.length == 0) { |
|
83 |
System.out.println("Can't apply TreeTagger to a corpus with no XML-TXM files in "+txmDirectory); |
|
84 |
} |
|
85 |
|
|
86 |
String lang = model.getName(); |
|
87 |
if (!lang.endsWith(".par")) { |
|
88 |
System.out.println("Model file name must ends with the '.par' extension"); |
|
89 |
return; |
|
90 |
} |
|
91 |
lang = lang.substring(0, lang.indexOf(".par")); |
|
92 |
|
|
93 |
final HashMap<String, String> hash = new HashMap<String, String>(); |
|
94 |
for (File txmFile : files) { |
|
95 |
hash.put(txmFile.getName(), lang); |
|
96 |
} |
|
97 |
|
|
98 |
for (int i = 0 ; i < properties.length ; i++) properties[i] = properties[i].trim(); |
|
99 |
|
|
100 |
System.out.println("APPLY : "+model+" to "+corpus+" updating "+Arrays.toString(properties)+ " with options "+Arrays.toString(options)); |
|
101 |
JobHandler job = new JobHandler("Applying TreeTagger to "+corpus+" corpus.") { |
|
102 |
@Override |
|
103 |
protected IStatus run(IProgressMonitor monitor) { |
|
104 |
this.runInit(monitor); |
|
105 |
Annotate annotator = new Annotate(); |
|
106 |
annotator.setModelsDirectory(model.getParentFile()); |
|
107 |
annotator.setDebug(); |
|
108 |
if (!annotator.run(corpusBinaryDirectory, txmDirectory, hash, true, properties, options)) { |
|
109 |
System.out.println("Fail to apply TreeTagger with "+txmDirectory+" files."); |
|
110 |
return Status.CANCEL_STATUS; |
|
111 |
} |
|
112 |
System.out.println("Done. Updating corpus..."); |
|
113 |
|
|
114 |
if (UpdateCorpus.update(mainCorpus) == null) { |
|
115 |
System.out.println("Fail to update corpus indexes and editions."); |
|
116 |
} |
|
117 |
System.out.println("Done."); |
|
118 |
return Status.OK_STATUS;//frppos |
|
119 |
} |
|
120 |
}; |
|
121 |
job.schedule(); |
|
122 |
} |
|
123 |
} |
|
0 | 124 |
tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/LemmaProjection.java (revision 687) | ||
---|---|---|
1 |
package org.txm.treetagger.rcp.handlers; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.File; |
|
5 |
import java.io.FileFilter; |
|
6 |
import java.io.PrintWriter; |
|
7 |
import java.util.Arrays; |
|
8 |
import java.util.Collections; |
|
9 |
import java.util.HashSet; |
|
10 |
import java.util.LinkedHashMap; |
|
11 |
import java.util.LinkedHashSet; |
|
12 |
|
|
13 |
import org.apache.commons.lang.StringUtils; |
|
14 |
import org.eclipse.core.commands.AbstractHandler; |
|
15 |
import org.eclipse.core.commands.ExecutionEvent; |
|
16 |
import org.eclipse.core.commands.ExecutionException; |
|
17 |
import org.eclipse.core.runtime.IProgressMonitor; |
|
18 |
import org.eclipse.core.runtime.IStatus; |
|
19 |
import org.eclipse.core.runtime.Status; |
|
20 |
import org.eclipse.jface.viewers.ISelection; |
|
21 |
import org.eclipse.jface.viewers.IStructuredSelection; |
|
22 |
import org.eclipse.ui.IWorkbenchWindow; |
|
23 |
import org.eclipse.ui.handlers.HandlerUtil; |
|
24 |
import org.kohsuke.args4j.Option; |
|
25 |
import org.txm.Toolbox; |
|
26 |
import org.txm.core.preferences.TBXPreferences; |
|
27 |
import org.txm.core.preferences.TXMPreferences; |
|
28 |
import org.txm.rcp.swt.widget.parameters.ParametersDialog; |
|
29 |
import org.txm.rcp.utils.JobHandler; |
|
30 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
|
31 |
import org.txm.searchengine.cqp.corpus.Corpus; |
|
32 |
import org.txm.searchengine.cqp.corpus.MainCorpus; |
|
33 |
import org.txm.searchengine.cqp.corpus.Property; |
|
34 |
import org.txm.stat.utils.ConsoleProgressBar; |
|
35 |
import org.txm.utils.DeleteDir; |
|
36 |
import org.txm.utils.io.FileCopy; |
|
37 |
import org.txm.utils.io.IOUtils; |
|
38 |
import org.txm.utils.Tuple; |
|
39 |
|
|
40 |
/** |
|
41 |
* Our sample handler extends AbstractHandler, an IHandler base class. |
|
42 |
* @see org.eclipse.core.commands.IHandler |
|
43 |
* @see org.eclipse.core.commands.AbstractHandler |
|
44 |
*/ |
|
45 |
public class LemmaProjection extends AbstractHandler { |
|
46 |
|
|
47 |
protected static final String EXTRA = "extra"; |
|
48 |
@Option(name="dictionary", usage="TSV Dictionary file with form, msd, lemma, source columns", widget="File", required=true, def="frolex.tsv") |
|
49 |
public File dictionary = null; |
|
50 |
@Option(name="extrarules", usage="form+pos rules files", widget="File", required=false, def="extrarules.tsv") |
|
51 |
public File extrarules = null; |
|
52 |
@Option(name="posproperty", usage="The lexicon property to read", widget="String", required=true, def="frpos") |
|
53 |
public String posproperty = null; |
|
54 |
@Option(name="lemmaproperty", usage="The property to create/update in the corpus", widget="String", required=true, def="plemma") |
|
55 |
public String lemmaproperty = null; |
|
56 |
@Option(name="formAsLemmaPosList", usage="Pos values lemma exceptions", widget="String", required=false, def="NOMPro") |
|
57 |
public String formAsLemmaPosList = null; |
|
58 |
@Option(name="sourcePriorityList", usage="The property to create/update in the corpus", widget="String", required=true, def="TL") |
|
59 |
public String sourcePriorityList = null; |
|
60 |
|
|
61 |
/** |
|
62 |
* |
|
63 |
*/ |
|
64 |
public Object execute(ExecutionEvent event) throws ExecutionException { |
|
65 |
Corpus corpus = null; |
|
66 |
IWorkbenchWindow window = HandlerUtil.getActiveWorkbenchWindowChecked(event); |
|
67 |
|
|
68 |
ISelection isel = window.getActivePage().getSelection(); |
|
69 |
if (isel instanceof IStructuredSelection) { |
|
70 |
IStructuredSelection sel = (IStructuredSelection)isel; |
|
71 |
Object first = sel.getFirstElement(); |
|
72 |
if (first instanceof Corpus) { |
|
73 |
corpus = (Corpus)first; |
|
74 |
if (ParametersDialog.open(this)) { |
|
75 |
LinkedHashSet<String> formAsLemmaPosSet = new LinkedHashSet<String>(); |
|
76 |
formAsLemmaPosSet.addAll(Arrays.asList(formAsLemmaPosList.split(","))); |
|
77 |
LinkedHashSet<String> sourcePrioritySet = new LinkedHashSet<String>(); |
|
78 |
if (extrarules != null && extrarules.exists()) sourcePrioritySet.add(EXTRA); // extra must be the first source |
|
79 |
sourcePrioritySet.addAll(Arrays.asList(sourcePriorityList.split(","))); |
|
80 |
|
|
81 |
System.out.println("formAsLemmaPosSet="+formAsLemmaPosSet); |
|
82 |
System.out.println("sourcePrioritySet="+sourcePrioritySet); |
|
83 |
apply(corpus, dictionary, extrarules, posproperty, lemmaproperty, formAsLemmaPosSet, sourcePrioritySet); |
|
84 |
return corpus; |
|
85 |
} |
|
86 |
} |
|
87 |
} |
|
88 |
|
|
89 |
System.out.println("Wrong selection."); |
|
90 |
return null; |
|
91 |
} |
|
92 |
|
|
93 |
public static void apply(final Corpus corpus, final File dictionary, final File extrarules, final String posproperty, |
|
94 |
final String targetproperty, final LinkedHashSet<String> formAsLemmaPosList, final LinkedHashSet<String> sourceprioritylist) { |
|
95 |
final MainCorpus mainCorpus = corpus.getMainCorpus(); |
|
96 |
final File corpusBinaryDirectory = mainCorpus.getBaseDirectory(); |
|
97 |
final File txmDirectory = new File(corpusBinaryDirectory, "txm/"+mainCorpus.getName()); |
|
98 |
|
|
99 |
if (!txmDirectory.exists()) { |
|
100 |
System.out.println("Can't process a corpus with no XML-TXM files directory: "+txmDirectory); |
|
101 |
return; |
|
102 |
} |
|
103 |
|
|
104 |
final File[] files = txmDirectory.listFiles(new FileFilter() { |
|
105 |
@Override |
|
106 |
public boolean accept(File file) { |
|
107 |
return file.isFile() && file.canWrite() && file.getName().endsWith(".xml"); |
|
108 |
} |
|
109 |
}); |
|
110 |
|
|
111 |
Property pos = null; |
|
112 |
try { |
|
113 |
pos = mainCorpus.getProperty(posproperty); |
|
114 |
} catch (CqiClientException e1) { |
|
115 |
// TODO Auto-generated catch block |
|
116 |
e1.printStackTrace(); |
|
117 |
} |
|
118 |
if (pos == null) { |
|
119 |
System.out.println("No pos property found with name="+posproperty); |
|
120 |
return; |
|
121 |
} |
|
122 |
|
|
123 |
if (files == null || files.length == 0) { |
|
124 |
System.out.println("Can't process a corpus with no XML-TXM files in "+txmDirectory); |
|
125 |
return; |
|
126 |
} |
|
127 |
|
|
128 |
System.out.println("APPLYING : "+dictionary+" to "+mainCorpus+": creating/updating "+targetproperty+ " property with lexicon "+dictionary); |
|
129 |
JobHandler job = new JobHandler("Creating/Updating "+targetproperty+" property.") { |
|
130 |
@Override |
|
131 |
protected IStatus run(IProgressMonitor monitor) { |
|
132 |
this.runInit(monitor); |
|
133 |
Tuple t; |
|
134 |
LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>> rules = new LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>>(); |
|
135 |
HashSet<String> formAsLemmaPosSet = new HashSet<String>(); |
|
136 |
try { |
|
137 |
// load rules |
|
138 |
BufferedReader reader = IOUtils.getReader(dictionary); |
|
139 |
String line = reader.readLine(); |
|
140 |
while (line != null) { |
|
141 |
String[] splitTab = line.split("\t"); |
|
142 |
if (splitTab.length != 5) { |
|
143 |
System.out.println("Error in dictionary files with line='"+line+"': length is not 5. Found: "+Arrays.toString(splitTab)); |
|
144 |
line = reader.readLine(); |
|
145 |
reader.close(); |
|
146 |
return Status.CANCEL_STATUS; |
|
147 |
} |
|
148 |
String form = splitTab[0]; |
|
149 |
String pos = splitTab[1];//.replace("<no_pos>|", "").replace("|<no_pos>|", "").replace("|<no_pos>", ""); |
|
150 |
String lemma = splitTab[2];//.replace("<no_lemma>|", "").replace("|<no_lemma>|", "").replace("|<no_lemma>", ""); |
|
151 |
String source = splitTab[3]; |
|
152 |
|
|
153 |
if (! rules.containsKey(form)) rules.put(form, new LinkedHashMap<String, LinkedHashMap<String, String>>()); |
|
154 |
LinkedHashMap<String, LinkedHashMap<String, String>> posHash = rules.get(form); |
|
155 |
|
|
156 |
if (!lemma.equals("<no_lemma>")) { |
|
157 |
if (!posHash.containsKey(pos)) posHash.put(pos, new LinkedHashMap<String, String>()); |
|
158 |
LinkedHashMap<String, String> sourceHash = posHash.get(pos); |
|
159 |
|
|
160 |
sourceHash.put(source, lemma); |
|
161 |
} |
|
162 |
line = reader.readLine(); |
|
163 |
} |
|
164 |
reader.close(); |
|
165 |
System.out.println("Dictionary rules loaded: "+rules.size()); |
|
166 |
|
|
167 |
if (extrarules.exists()) { |
|
168 |
reader = IOUtils.getReader(extrarules); |
|
169 |
line = reader.readLine(); |
|
170 |
while (line != null) { |
|
171 |
String[] splitTab = line.split("\t"); |
|
172 |
if (splitTab.length != 3) { |
|
173 |
System.out.println("Error in extra rule files with line='"+line+"': length is not 3."); |
|
174 |
line = reader.readLine(); |
|
175 |
continue; |
|
176 |
} |
|
177 |
String form = splitTab[0]; |
|
178 |
String pos = splitTab[1]; |
|
179 |
String lemma = splitTab[2]; |
|
180 |
if (! rules.containsKey(form)) rules.put(form, new LinkedHashMap<String, LinkedHashMap<String, String>>()); |
|
181 |
LinkedHashMap<String, LinkedHashMap<String, String>> posHash = rules.get(form); |
|
182 |
|
|
183 |
if (!posHash.containsKey(pos)) posHash.put(pos, new LinkedHashMap<String, String>()); |
|
184 |
LinkedHashMap<String, String> sourceHash = posHash.get(pos); |
|
185 |
sourceHash.put(EXTRA, lemma); |
|
186 |
|
|
187 |
} |
|
188 |
reader.close(); |
|
189 |
System.out.println("Dictionary extra rules loaded: "+rules.size()); |
|
190 |
} else { |
|
191 |
System.out.println("No extra rule loaded."); |
|
192 |
} |
|
193 |
|
|
194 |
PrintWriter writer = IOUtils.getWriter("/tmp/rules.txt"); |
|
195 |
for (String k : rules.keySet()) { |
|
196 |
writer.println("FORM="+k); |
|
197 |
LinkedHashMap<String, LinkedHashMap<String, String>> rules2 = rules.get(k); |
|
198 |
for (String k2 : rules2.keySet()) { |
|
199 |
writer.println(" POS="+k2); |
|
200 |
LinkedHashMap<String, String> rules3 = rules2.get(k2); |
|
201 |
for (String k3 : rules3.keySet()) { |
|
202 |
writer.println(" SOURCE="+k3); |
|
203 |
String ls2 = rules3.get(k3); |
|
204 |
writer.println(" LEMMA="+ls2); |
|
205 |
} |
|
206 |
} |
|
207 |
} |
|
208 |
writer.close(); |
|
209 |
System.out.println("RULE DUMP: /tmp/rules.txt"); |
|
210 |
|
|
211 |
// load rules |
|
212 |
for (String s : formAsLemmaPosList) { |
|
213 |
formAsLemmaPosSet.add(s); |
|
214 |
} |
|
215 |
System.out.println("POS exception rules loaded: "+formAsLemmaPosSet.size()); |
|
216 |
|
|
217 |
// save previous version of XML-TXM files |
|
218 |
File previousXMLTXMDirectory = new File(txmDirectory.getAbsolutePath()+"_previous"); |
|
219 |
DeleteDir.deleteDirectory(previousXMLTXMDirectory); |
|
220 |
FileCopy.copyFiles(txmDirectory, previousXMLTXMDirectory); |
|
221 |
|
|
222 |
// work |
|
223 |
File noMatchsFile = new File(TXMPreferences.getString(TBXPreferences.USER_TXM_HOME, TBXPreferences.PREFERENCES_NODE), "results/nomatch.txt"); |
|
224 |
HashSet<String> noMatchsSet = new HashSet<String>(); |
|
225 |
ConsoleProgressBar cpb = new ConsoleProgressBar(files.length); |
|
226 |
for (File xmlFile : files) { |
|
227 |
cpb.tick(); |
|
228 |
XMLLemmaProjection p = new XMLLemmaProjection(xmlFile, rules, formAsLemmaPosSet, sourceprioritylist, posproperty, targetproperty); |
|
229 |
File tmpFile = new File(xmlFile.getParentFile(), "tmp_"+xmlFile.getName()); |
|
230 |
if (p.process(tmpFile)) { |
|
231 |
if (xmlFile.delete() && tmpFile.renameTo(xmlFile)) { |
|
232 |
// ok |
|
233 |
} else { |
|
234 |
System.out.println("Error during lemma projection: can't replace XML-TXM file: "+xmlFile); |
|
235 |
return Status.CANCEL_STATUS; |
|
236 |
} |
|
237 |
} else { |
|
238 |
System.out.println("Error during lemma projection. Aborting."); |
|
239 |
return Status.CANCEL_STATUS; |
|
240 |
} |
|
241 |
if (p.getNoMatchValues().size() > 0) { |
|
242 |
System.out.println("No matchs found with file "+xmlFile.getName()+": "+p.getNoMatchValues()); |
|
243 |
noMatchsSet.addAll(p.getNoMatchValues()); |
|
244 |
} |
|
245 |
} |
|
246 |
|
|
247 |
if (noMatchsSet.size() > 0) { |
|
248 |
System.out.println("Missing lemma values report saved in: "+noMatchsFile); |
|
249 |
IOUtils.write(noMatchsFile, StringUtils.join(noMatchsSet, "\n")); |
|
250 |
} |
|
251 |
|
|
252 |
cpb.done(); |
|
253 |
monitor.worked(50); |
|
254 |
|
|
255 |
// update corpus |
|
256 |
// update corpus indexes and edition |
|
257 |
// String txmhome = Toolbox.getTXMHOMEPATH(); |
|
258 |
// |
|
259 |
// BaseParameters params = corpus.getBase().getBaseParameters(); |
|
260 |
// params.getKeyValueParameters().put(ImportKeys.MULTITHREAD, "false"); //too soon |
|
261 |
// params.getKeyValueParameters().put(ImportKeys.DEBUG, Log.getLevel().intValue() < Level.WARNING.intValue()); // need debug for experimental stuff |
|
262 |
// params.getKeyValueParameters().put(ImportKeys.UPDATECORPUS, "true"); |
|
263 |
// |
|
264 |
// monitor.setTaskName("Updating corpus"); |
|
265 |
// File scriptDir = new File(txmhome, "scripts/import"); |
|
266 |
// File script = new File(scriptDir, "xtzLoader.groovy"); |
|
267 |
// System.out.println("Updating corpus "+corpus+" using "+params.paramFile); |
|
268 |
// boolean ret = ExecuteImportScript.executeScript(script.getAbsolutePath(), params); |
|
269 |
// if (!ret) { |
|
270 |
// System.out.println("Error during corpus re-import, check the XML-TXM files. Previous version can be restored from "+previousXMLTXMDirectory); |
|
271 |
// return Status.CANCEL_STATUS; |
|
272 |
// } |
|
273 |
// Display.getDefault().syncExec(new Runnable() { |
|
274 |
// @Override |
|
275 |
// public void run() {CloseEditorsUsing.corpus(corpus);} |
|
276 |
// }); |
|
277 |
// monitor.worked(50); |
|
278 |
|
|
279 |
} catch (Exception e) { |
|
280 |
e.printStackTrace(); |
|
281 |
return Status.CANCEL_STATUS; |
|
282 |
} |
|
283 |
System.out.println("Done."); |
|
284 |
return Status.OK_STATUS; |
|
285 |
} |
|
286 |
}; |
|
287 |
job.schedule(); |
|
288 |
} |
|
289 |
} |
|
0 | 290 |
tmp/org.txm.treetagger.rcp/src/org/txm/treetagger/rcp/handlers/XMLLemmaProjection.java (revision 687) | ||
---|---|---|
1 |
package org.txm.treetagger.rcp.handlers; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.io.IOException; |
|
5 |
import java.util.HashSet; |
|
6 |
import java.util.LinkedHashMap; |
|
7 |
import java.util.LinkedHashSet; |
|
8 |
import java.util.regex.Pattern; |
|
9 |
|
|
10 |
import javax.xml.stream.XMLStreamException; |
|
11 |
|
|
12 |
import org.txm.importer.StaxIdentityParser; |
|
13 |
|
|
14 |
public class XMLLemmaProjection extends StaxIdentityParser { |
|
15 |
|
|
16 |
// form -> pos -> source -> lemma |
|
17 |
protected LinkedHashMap<String, LinkedHashMap<String, LinkedHashMap<String, String>>> rules = null; |
|
18 |
protected HashSet<String> formAsLemmaPosList = null; |
|
19 |
protected String lemmaProperty; |
|
20 |
|
|
21 |
protected HashSet<String> noMatchValues = new HashSet<String>(); |
|
22 |
protected String posProperty; |
|
23 |
protected LinkedHashSet<String> lemmaSourcePriorityList; |
|
24 |
|
|
25 |
public XMLLemmaProjection(File infile, LinkedHashMap<String, LinkedHashMap<String, |
|
26 |
LinkedHashMap<String, String>>> rules, |
|
27 |
HashSet<String> formAsLemmaPosList, |
|
28 |
LinkedHashSet<String> lemmaSourcePriorityList, |
|
29 |
String posProperty, String lemmaProperty) throws IOException, XMLStreamException { |
|
30 |
super(infile); |
|
31 |
this.rules = rules; |
|
32 |
this.formAsLemmaPosList = formAsLemmaPosList; |
|
33 |
this.lemmaSourcePriorityList = lemmaSourcePriorityList; |
|
34 |
this.lemmaProperty = lemmaProperty; |
|
35 |
this.posProperty = posProperty; |
|
36 |
|
|
37 |
// the XML-TXM files word properties name starts wit # (they are references) |
|
38 |
if (!this.lemmaProperty.startsWith("#")) this.lemmaProperty = "#"+this.lemmaProperty; |
|
39 |
if (!this.posProperty.startsWith("#")) this.posProperty = "#"+this.posProperty; |
|
40 |
} |
|
41 |
|
|
42 |
boolean inW = false, inAna = false, inForm; |
|
43 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>(); |
|
44 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>(); |
|
45 |
String typeName = null; |
|
46 |
String respName = null; |
|
47 |
String formValue, typeValue = null; |
|
48 |
|
|
49 |
@Override |
|
50 |
public void processStartElement() throws XMLStreamException, IOException { |
|
51 |
if (!inW) super.processStartElement(); // don't write W content |
|
52 |
|
|
53 |
if (localname.equals("w")) { |
|
54 |
inW = true; |
|
55 |
anaValues.clear(); |
|
56 |
anaResps.clear(); |
|
57 |
|
|
58 |
//initialize the new type to a empty value in case there is transformation rule |
|
59 |
anaValues.put(lemmaProperty, ""); |
|
60 |
anaResps.put(lemmaProperty, "#txm_recode"); |
|
61 |
} else if (localname.equals("ana")) { |
|
62 |
inAna = true; |
|
63 |
typeName = parser.getAttributeValue(null, "type"); |
|
64 |
respName = parser.getAttributeValue(null, "resp"); |
|
65 |
anaResps.put(typeName, respName); |
|
66 |
//if (typeName != null) typeName = typeName.substring(1); // remove # |
|
67 |
typeValue = ""; |
|
68 |
} else if (localname.equals("form")) { |
|
69 |
inForm = true; |
|
70 |
formValue = ""; |
|
71 |
} |
Formats disponibles : Unified diff