Révision 3537
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/command/ComputeTreeSearch.java (revision 3537) | ||
---|---|---|
146 | 146 |
} |
147 | 147 |
|
148 | 148 |
public static TreeSearch getTreeSearchFor(CQPCorpus corpus) { |
149 |
TreeSearchSelector s = getSelectorForCorpus(corpus, TreeSearchPreferences.getInstance().getString(TreeSearchPreferences.DEFAULT_REPRESENTATION));
|
|
149 |
TreeSearchSelector s = getSelectorForCorpus(corpus, TreeSearchPreferences.getInstance().getString(TreeSearchPreferences.DEFAULT_VISUALISATION));
|
|
150 | 150 |
if (s != null) return s.getTreeSearch(corpus); |
151 | 151 |
|
152 | 152 |
return null; |
... | ... | |
182 | 182 |
|
183 | 183 |
if (parent instanceof CQPCorpus) { |
184 | 184 |
CQPCorpus corpus = (CQPCorpus)parent; |
185 |
TreeSearchSelector s = getSelectorForCorpus(corpus, TreeSearchPreferences.getInstance().getString(TreeSearchPreferences.DEFAULT_REPRESENTATION));
|
|
185 |
TreeSearchSelector s = getSelectorForCorpus(corpus, TreeSearchPreferences.getInstance().getString(TreeSearchPreferences.DEFAULT_VISUALISATION));
|
|
186 | 186 |
ts = s.getTreeSearch((String)obj, corpus); |
187 | 187 |
} |
188 | 188 |
} |
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/preferences/SyntacticAnnotationPreferencePage.java (revision 3537) | ||
---|---|---|
1 |
package org.txm.treesearch.preferences; |
|
2 |
|
|
3 |
import org.eclipse.ui.IWorkbench; |
|
4 |
import org.txm.rcp.IImageKeys; |
|
5 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
6 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
7 |
import org.txm.treesearch.rcp.Messages; |
|
8 |
|
|
9 |
/** |
|
10 |
* Syntatic annotations preferences page |
|
11 |
* |
|
12 |
* @author mdecorde |
|
13 |
* |
|
14 |
*/ |
|
15 |
public class SyntacticAnnotationPreferencePage extends TXMPreferencePage { |
|
16 |
|
|
17 |
@Override |
|
18 |
public void createFieldEditors() { |
|
19 |
//this.addField(new StringFieldEditor(TreeSearchPreferences.DEFAULT_VISUALISATION, Messages.SyntacticVisualisation, this.getFieldEditorParent())); |
|
20 |
} |
|
21 |
|
|
22 |
/* |
|
23 |
* (non-Javadoc) |
|
24 |
* |
|
25 |
* @see |
|
26 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
27 |
*/ |
|
28 |
@Override |
|
29 |
public void init(IWorkbench workbench) { |
|
30 |
this.setPreferenceStore(new TXMPreferenceStore(TreeSearchPreferences.getInstance().getPreferencesNodeQualifier())); |
|
31 |
this.setDescription(Messages.SyntacticAnnotation); |
|
32 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/Tree.png")); //$NON-NLS-1$ |
|
33 |
} |
|
34 |
} |
|
0 | 35 |
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/preferences/TreeSearchPreferencePage.java (revision 3537) | ||
---|---|---|
8 | 8 |
import org.txm.treesearch.rcp.Messages; |
9 | 9 |
|
10 | 10 |
/** |
11 |
* Syntatic annotations preferences page
|
|
11 |
* TreeSearch command preferences page
|
|
12 | 12 |
* |
13 | 13 |
* @author mdecorde |
14 | 14 |
* |
... | ... | |
17 | 17 |
|
18 | 18 |
@Override |
19 | 19 |
public void createFieldEditors() { |
20 |
this.addField(new StringFieldEditor(TreeSearchPreferences.DEFAULT_REPRESENTATION, Messages.SyntacticRepresentation, this.getFieldEditorParent())); |
|
20 |
|
|
21 |
this.addField(new StringFieldEditor(TreeSearchPreferences.DEFAULT_VISUALISATION, Messages.DefaultSyntacticVisualisation, this.getFieldEditorParent())); |
|
21 | 22 |
} |
22 | 23 |
|
23 | 24 |
/* |
... | ... | |
28 | 29 |
*/ |
29 | 30 |
@Override |
30 | 31 |
public void init(IWorkbench workbench) { |
32 |
|
|
31 | 33 |
this.setPreferenceStore(new TXMPreferenceStore(TreeSearchPreferences.getInstance().getPreferencesNodeQualifier())); |
32 |
this.setDescription(Messages.SyntacticAnnotation); |
|
33 | 34 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/Tree.png")); //$NON-NLS-1$ |
34 | 35 |
} |
35 | 36 |
} |
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/rcp/messages.properties (revision 3537) | ||
---|---|---|
10 | 10 |
ResultIsNotAConcordanceP0=Result is not a concordance: {0} |
11 | 11 |
SelectionIsNotACorpusP0=Selection is not a corpus: {0} |
12 | 12 |
SelectionIsNotALineP0=Selection is not a Line: {0} |
13 |
DefaultSyntacticVisualisation=Default Syntactic annotation |
|
13 | 14 |
SyntacticAnnotation=Syntactic annotation |
14 |
SyntacticRepresentation=Syntactic representation |
|
15 |
SyntacticTree=Syntactic tree |
|
16 |
SyntacticVisualisation=Syntactic representation |
|
15 | 17 |
SyntaxRepresentationP0=Syntax representations: {0} |
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/rcp/Messages.java (revision 3537) | ||
---|---|---|
30 | 30 |
|
31 | 31 |
public static String SelectionIsNotALineP0; |
32 | 32 |
public static String SyntacticAnnotation; |
33 |
public static String SyntacticTree; |
|
33 | 34 |
|
34 |
public static String SyntacticRepresentation; |
|
35 |
public static String DefaultSyntacticVisualisation; |
|
36 |
public static String SyntacticVisualisation; |
|
35 | 37 |
|
36 | 38 |
public static String SyntaxRepresentationP0; |
37 | 39 |
static { |
TXM/trunk/org.txm.treesearch.rcp/src/org/txm/treesearch/editor/TreeSearchEditor.java (revision 3537) | ||
---|---|---|
29 | 29 |
import org.txm.rcp.IImageKeys; |
30 | 30 |
import org.txm.rcp.editors.TXMEditor; |
31 | 31 |
import org.txm.rcp.editors.TXMResultEditorInput; |
32 |
import org.txm.rcp.messages.TXMUIMessages; |
|
32 | 33 |
import org.txm.rcp.swt.GLComposite; |
33 | 34 |
import org.txm.rcp.swt.widget.LargeQueryField; |
34 | 35 |
import org.txm.rcp.views.QueriesView; |
... | ... | |
176 | 177 |
Composite queryPanel = this.getExtendedParametersGroup(); |
177 | 178 |
|
178 | 179 |
// fill query Area |
179 |
GridLayout qlayout = new GridLayout(13, false); |
|
180 |
GridLayout qlayout = new GridLayout(1, false); |
|
181 |
qlayout.horizontalSpacing = 0; |
|
182 |
// qlayout.verticalSpacing = 0; |
|
180 | 183 |
queryPanel.setLayout(qlayout); |
181 | 184 |
|
182 |
queryArea = new LargeQueryField(queryPanel, SWT.BORDER | SWT.V_SCROLL | SWT.H_SCROLL, currentSelector.getQueryClass()); |
|
185 |
|
|
186 |
// first line |
|
187 |
GLComposite line1 = new GLComposite(queryPanel, SWT.NONE, "line1"); |
|
188 |
line1.getLayout().numColumns = 13; |
|
189 |
line1.getLayout().horizontalSpacing = 5; |
|
190 |
line1.setLayoutData(new GridData(GridData.FILL, GridData.FILL, true, true)); |
|
191 |
|
|
192 |
Label l = new Label(line1, SWT.NONE); |
|
193 |
l.setText(TXMUIMessages.query); //$NON-NLS-1$ |
|
194 |
l.setLayoutData(new GridData(GridData.CENTER, GridData.CENTER, false, false)); |
|
195 |
|
|
196 |
queryArea = new LargeQueryField(line1, SWT.BORDER | SWT.V_SCROLL | SWT.H_SCROLL, currentSelector.getQueryClass()); |
|
183 | 197 |
GridData queryAreaLayoutData = new GridData(GridData.FILL, GridData.FILL, true, true); |
184 |
queryAreaLayoutData.horizontalSpan = 13;
|
|
198 |
queryAreaLayoutData.horizontalSpan = 12;
|
|
185 | 199 |
queryAreaLayoutData.heightHint = 80; |
186 | 200 |
queryAreaLayoutData.minimumHeight = 80; |
187 | 201 |
queryArea.setLayoutData(queryAreaLayoutData); |
... | ... | |
199 | 213 |
} |
200 | 214 |
}); |
201 | 215 |
|
202 |
new Label(queryPanel, SWT.NONE).setText("T "); //$NON-NLS-1$ |
|
203 |
TCombo = new Combo(queryPanel, SWT.READ_ONLY); |
|
216 |
// second line |
|
217 |
GLComposite line2 = new GLComposite(queryPanel, SWT.NONE, "line2"); |
|
218 |
line2.getLayout().numColumns = 4; |
|
219 |
line2.getLayout().horizontalSpacing = 5; |
|
220 |
line2.setLayoutData(new GridData(GridData.FILL, GridData.FILL, true, true)); |
|
221 |
|
|
222 |
Label l1 = new Label(line2, SWT.NONE); |
|
223 |
l1.setText("T "); //$NON-NLS-1$ |
|
224 |
l1.setLayoutData(new GridData(SWT.CENTER, SWT.CENTER, false, false)); |
|
225 |
|
|
226 |
TCombo = new Combo(line2, SWT.READ_ONLY); |
|
204 | 227 |
GridData gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
205 | 228 |
TCombo.setLayoutData(gdata); |
206 | 229 |
TCombo.addSelectionListener(selChangedListener); |
207 | 230 |
|
208 |
new Label(queryPanel, SWT.NONE).setText("NT "); //$NON-NLS-1$ |
|
209 |
NTCombo = new Combo(queryPanel, SWT.READ_ONLY); |
|
231 |
Label l2 = new Label(line2, SWT.NONE); |
|
232 |
l2.setText("NT "); //$NON-NLS-1$ |
|
233 |
l2.setLayoutData(new GridData(SWT.CENTER, SWT.CENTER, false, false)); |
|
234 |
|
|
235 |
NTCombo = new Combo(line2, SWT.READ_ONLY); |
|
210 | 236 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
211 | 237 |
NTCombo.setLayoutData(gdata); |
212 | 238 |
NTCombo.addSelectionListener(selChangedListener); |
213 | 239 |
|
240 |
|
|
241 |
// bottom toolbar |
|
214 | 242 |
GLComposite navigationAreaComposite = getBottomToolbar().installGLComposite(ConcordanceUIMessages.navigation, 10, false); |
243 |
navigationAreaComposite.getLayout().verticalSpacing = 5; |
|
244 |
navigationAreaComposite.getLayout().horizontalSpacing = 5; |
|
215 | 245 |
navigationAreaComposite.setLayoutData(new GridData(SWT.FILL, SWT.FILL, true, false)); |
216 | 246 |
|
217 | 247 |
// fill param Area |
218 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
|
219 |
new Label(navigationAreaComposite, SWT.NONE).setText("Sent "); //$NON-NLS-1$ |
|
248 |
Label l3 = new Label(navigationAreaComposite, SWT.NONE); |
|
249 |
l3.setText("Match "); //$NON-NLS-1$ |
|
250 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false); |
|
251 |
l3.setLayoutData(gdata); |
|
252 |
|
|
220 | 253 |
sentSpinner = new Spinner(navigationAreaComposite, SWT.BORDER); |
221 | 254 |
sentSpinner.setMinimum(1); |
222 | 255 |
sentSpinner.setIncrement(1); |
223 | 256 |
sentSpinner.setMaximum(10000000); |
224 | 257 |
sentSpinner.setSelection(1); |
225 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, true); |
|
258 |
|
|
259 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false); |
|
226 | 260 |
sentSpinner.setLayoutData(gdata); |
227 | 261 |
sentSpinner.addSelectionListener(selChangedListener); |
228 | 262 |
|
229 | 263 |
sentCounterLabel = new Label(navigationAreaComposite, SWT.NONE); |
230 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
|
|
264 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false);
|
|
231 | 265 |
sentCounterLabel.setLayoutData(gdata); |
232 | 266 |
|
267 |
new Label(navigationAreaComposite, SWT.NONE); |
|
268 |
|
|
233 | 269 |
//if (tsPerRepresentation.get(currentSelector.getEngine()).hasSubMatchesStrategy()) { |
234 |
new Label(navigationAreaComposite, SWT.NONE).setText("Sub "); //$NON-NLS-1$ |
|
270 |
Label l4 = new Label(navigationAreaComposite, SWT.NONE); |
|
271 |
l4.setText("Sub-Match "); //$NON-NLS-1$ |
|
272 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false); |
|
273 |
l4.setLayoutData(gdata); |
|
274 |
|
|
235 | 275 |
subSpinner = new Spinner(navigationAreaComposite, SWT.BORDER); |
236 | 276 |
subSpinner.setMinimum(1); |
237 | 277 |
subSpinner.setIncrement(1); |
238 | 278 |
subSpinner.setMaximum(100000000); |
239 | 279 |
subSpinner.setSelection(1); |
240 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, true);
|
|
280 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false);
|
|
241 | 281 |
subSpinner.setLayoutData(gdata); |
242 | 282 |
subSpinner.addSelectionListener(selChangedListener); |
243 | 283 |
|
244 | 284 |
subCounterLabel = new Label(navigationAreaComposite, SWT.NONE); |
245 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
|
|
285 |
gdata = new GridData(SWT.FILL, SWT.CENTER, false, false);
|
|
246 | 286 |
subCounterLabel.setLayoutData(gdata); |
247 | 287 |
//} |
248 | 288 |
|
TXM/trunk/org.txm.treesearch.rcp/OSGI-INF/l10n/bundle.properties (revision 3537) | ||
---|---|---|
9 | 9 |
|
10 | 10 |
editor.name = Syntactic Tree |
11 | 11 |
|
12 |
page.name = Syntactic annotations |
|
12 |
page.name = Syntactic Tree |
|
13 |
page.name2 = Syntactic Annotations |
TXM/trunk/org.txm.treesearch.rcp/OSGI-INF/l10n/bundle_fr.properties (revision 3537) | ||
---|---|---|
9 | 9 |
|
10 | 10 |
editor.name = Arbre syntaxique |
11 | 11 |
|
12 |
page.name = Annotations syntaxiques |
|
12 |
page.name = Arbre syntaxique |
|
13 |
page.name2 = Annotations syntaxiques |
TXM/trunk/org.txm.treesearch.rcp/plugin.xml (revision 3537) | ||
---|---|---|
102 | 102 |
<extension |
103 | 103 |
point="org.eclipse.ui.preferencePages"> |
104 | 104 |
<page |
105 |
category="org.txm.rcp.preferences.AdvancePreferencePage"
|
|
105 |
category="org.txm.rcp.preferences.UserPreferencePage"
|
|
106 | 106 |
class="org.txm.treesearch.preferences.TreeSearchPreferencePage" |
107 | 107 |
id="org.txm.treesearch.preferences.TreeSearchPreferencePage" |
108 | 108 |
name="%page.name"> |
109 | 109 |
</page> |
110 |
<page |
|
111 |
category="org.txm.rcp.preferences.AdvancePreferencePage" |
|
112 |
class="org.txm.treesearch.preferences.SyntacticAnnotationPreferencePage" |
|
113 |
id="org.txm.treesearch.preferences.SyntacticAnnotationPreferencePage" |
|
114 |
name="%page.name2"> |
|
115 |
</page> |
|
110 | 116 |
</extension> |
111 | 117 |
|
112 | 118 |
</plugin> |
TXM/trunk/org.txm.treesearch.core/src/org/txm/treesearch/preferences/TreeSearchPreferences.java (revision 3537) | ||
---|---|---|
14 | 14 |
|
15 | 15 |
public static final String VERSION = "version"; |
16 | 16 |
|
17 |
public static final String DEFAULT_REPRESENTATION = "default_representation";
|
|
17 |
public static final String DEFAULT_VISUALISATION = "default_representation";
|
|
18 | 18 |
|
19 | 19 |
public static final String TFEATURE = "t"; |
20 | 20 |
|
... | ... | |
37 | 37 |
super.initializeDefaultPreferences(); |
38 | 38 |
|
39 | 39 |
Preferences preferences = this.getDefaultPreferencesNode(); |
40 |
preferences.put(DEFAULT_REPRESENTATION, "TIGER");
|
|
40 |
preferences.put(DEFAULT_VISUALISATION, "TIGER");
|
|
41 | 41 |
preferences.put(TFEATURE, "word"); |
42 | 42 |
preferences.put(NTFEATURE, "cat"); |
43 | 43 |
} |
TXM/trunk/org.txm.treesearch.core/src/org/txm/treesearch/function/TreeSearch.java (revision 3537) | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.File; |
4 | 4 |
|
5 |
import org.eclipse.osgi.util.NLS; |
|
5 | 6 |
import org.txm.core.preferences.TXMPreferences; |
6 | 7 |
import org.txm.core.results.Parameter; |
7 | 8 |
import org.txm.core.results.TXMParameters; |
... | ... | |
104 | 105 |
return "no query"; |
105 | 106 |
} |
106 | 107 |
|
108 |
public String getComputingStartMessage() { |
|
109 |
return NLS.bind("Rendering {0} Syntactic Tree...", this.getSelector().getEngine()); |
|
110 |
} |
|
111 |
|
|
107 | 112 |
public abstract int getSub(int matchNo); |
108 | 113 |
|
109 | 114 |
public final String getT() { |
TXM/trunk/org.txm.perl.rcp/src/org/txm/perl/rcp/messages/Messages.java (revision 3537) | ||
---|---|---|
1 | 1 |
package org.txm.perl.rcp.messages; |
2 | 2 |
|
3 | 3 |
import org.eclipse.osgi.util.NLS; |
4 |
import org.txm.rcp.messages.TXMUIMessages; |
|
5 |
import org.txm.utils.messages.Utf8NLS; |
|
4 | 6 |
|
5 | 7 |
public class Messages extends NLS { |
6 | 8 |
|
7 |
private static final String BUNDLE_NAME = "org.txm.perl.rcp.preferences.messages"; //$NON-NLS-1$
|
|
9 |
private static final String BUNDLE_NAME = "org.txm.perl.rcp.messages"; //$NON-NLS-1$ |
|
8 | 10 |
|
9 | 11 |
public static String ExecutableAdditionalParameters; |
10 | 12 |
|
11 | 13 |
public static String ExecutableName; |
12 | 14 |
|
13 | 15 |
public static String HomeFolder; |
16 |
|
|
14 | 17 |
static { |
15 | 18 |
// initialize resource bundle |
16 |
NLS.initializeMessages(BUNDLE_NAME, Messages.class);
|
|
19 |
Utf8NLS.initializeMessages(BUNDLE_NAME, TXMUIMessages.class);
|
|
17 | 20 |
} |
18 | 21 |
|
19 | 22 |
private Messages() {} |
TXM/trunk/org.txm.perl.rcp/src/org/txm/perl/rcp/preferences/PerlPreferencesPage.java (revision 3537) | ||
---|---|---|
26 | 26 |
|
27 | 27 |
@Override |
28 | 28 |
protected void createFieldEditors() { |
29 |
|
|
29 | 30 |
this.addField(new StringFieldEditor(PerlPreferences.HOME, Messages.HomeFolder, this.getFieldEditorParent())); |
30 | 31 |
|
31 | 32 |
this.addField(new StringFieldEditor(PerlPreferences.EXECUTABLE_NAME, Messages.ExecutableName, this.getFieldEditorParent())); |
TXM/trunk/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TIGERSearchEngine.java (revision 3537) | ||
---|---|---|
31 | 31 |
import org.txm.searchengine.cqp.corpus.CQPCorpus; |
32 | 32 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
33 | 33 |
import org.txm.tigersearch.preferences.TigerSearchPreferences; |
34 |
import org.txm.tigersearch.preferences.TigerSearchTreePreferences; |
|
34 | 35 |
import org.txm.utils.DeleteDir; |
35 | 36 |
import org.txm.utils.io.IOUtils; |
36 | 37 |
import org.txm.utils.logger.Log; |
... | ... | |
213 | 214 |
MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped(); |
214 | 215 |
// MappedByteBuffer offsetsMapped = tcorpus.getOffsetsMapped(); |
215 | 216 |
|
216 |
boolean useSubMatches = TigerSearchPreferences.getInstance().getBoolean(TigerSearchPreferences.USESUBMATCHES);
|
|
217 |
boolean useSubMatches = TigerSearchTreePreferences.getInstance().getBoolean(TigerSearchTreePreferences.USESUBMATCHES);
|
|
217 | 218 |
|
218 | 219 |
// System.out.println("submatchSize: "+subsize); |
219 | 220 |
for (int isentMatch = 0; isentMatch < size; isentMatch++) { // the matching sentences |
TXM/trunk/org.txm.tigersearch.rcp/src/org/txm/tigersearch/rcp/preferences/TigerSearchPreferencePage.java (revision 3537) | ||
---|---|---|
1 | 1 |
package org.txm.tigersearch.rcp.preferences; |
2 | 2 |
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 | 3 |
import org.eclipse.jface.preference.StringFieldEditor; |
5 | 4 |
import org.eclipse.ui.IWorkbench; |
6 | 5 |
import org.txm.rcp.IImageKeys; |
... | ... | |
18 | 17 |
|
19 | 18 |
@Override |
20 | 19 |
public void createFieldEditors() { |
21 |
this.addField(new BooleanFieldEditor(TigerSearchPreferences.USESUBMATCHES, "Use sub matches", this.getFieldEditorParent())); |
|
22 | 20 |
|
23 | 21 |
this.addField(new StringFieldEditor(TigerSearchPreferences.DRIVER_FILENAME, "TIGER-XML driver filename during import", this.getFieldEditorParent())); |
24 | 22 |
|
25 | 23 |
this.addField(new StringFieldEditor(TigerSearchPreferences.FEATURE_VALUES_TO_IGNORE_IN_HEADER, "Comma separated list of feature values to not declare in the TIGER header during import", this.getFieldEditorParent())); |
26 |
|
|
27 |
this.addField(new StringFieldEditor(TigerSearchPreferences.TFEATURE, "Default T feature to show ", this.getFieldEditorParent())); |
|
28 |
|
|
29 |
this.addField(new StringFieldEditor(TigerSearchPreferences.NTFEATURE, "Default NT feature to show", this.getFieldEditorParent())); |
|
30 | 24 |
} |
31 | 25 |
|
32 | 26 |
/* |
... | ... | |
37 | 31 |
*/ |
38 | 32 |
@Override |
39 | 33 |
public void init(IWorkbench workbench) { |
34 |
|
|
40 | 35 |
this.setPreferenceStore(new TXMPreferenceStore(TigerSearchPreferences.getInstance().getPreferencesNodeQualifier())); |
41 |
this.setDescription("TIGERSearch"); |
|
36 |
|
|
42 | 37 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/TS.png")); |
43 | 38 |
} |
44 | 39 |
} |
TXM/trunk/org.txm.tigersearch.rcp/src/org/txm/tigersearch/rcp/preferences/TigerSearchTreePreferencePage.java (revision 3537) | ||
---|---|---|
1 |
package org.txm.tigersearch.rcp.preferences; |
|
2 |
|
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 |
import org.eclipse.jface.preference.StringFieldEditor; |
|
5 |
import org.eclipse.ui.IWorkbench; |
|
6 |
import org.txm.rcp.IImageKeys; |
|
7 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
8 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
9 |
import org.txm.tigersearch.preferences.TigerSearchTreePreferences; |
|
10 |
|
|
11 |
/** |
|
12 |
* TIGERSearch preferences page |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class TigerSearchTreePreferencePage extends TXMPreferencePage { |
|
18 |
|
|
19 |
@Override |
|
20 |
public void createFieldEditors() { |
|
21 |
|
|
22 |
this.addField(new StringFieldEditor(TigerSearchTreePreferences.TFEATURE, "Default T feature to show ", this.getFieldEditorParent())); |
|
23 |
|
|
24 |
this.addField(new StringFieldEditor(TigerSearchTreePreferences.NTFEATURE, "Default NT feature to show", this.getFieldEditorParent())); |
|
25 |
|
|
26 |
this.addField(new BooleanFieldEditor(TigerSearchTreePreferences.USESUBMATCHES, "Use sub matches", this.getFieldEditorParent())); |
|
27 |
} |
|
28 |
|
|
29 |
/* |
|
30 |
* (non-Javadoc) |
|
31 |
* |
|
32 |
* @see |
|
33 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
34 |
*/ |
|
35 |
@Override |
|
36 |
public void init(IWorkbench workbench) { |
|
37 |
|
|
38 |
this.setPreferenceStore(new TXMPreferenceStore(TigerSearchTreePreferences.getInstance().getPreferencesNodeQualifier())); |
|
39 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/TS.png")); |
|
40 |
} |
|
41 |
} |
|
0 | 42 |
TXM/trunk/org.txm.tigersearch.rcp/plugin.xml (revision 3537) | ||
---|---|---|
140 | 140 |
point="org.eclipse.ui.preferencePages"> |
141 | 141 |
<page |
142 | 142 |
category="org.txm.treesearch.preferences.TreeSearchPreferencePage" |
143 |
class="org.txm.tigersearch.rcp.preferences.TigerSearchTreePreferencePage" |
|
144 |
id="org.txm.tigersearch.rcp.preferences.TigerSearchTreePreferencePage" |
|
145 |
name="%page.name"> |
|
146 |
</page> |
|
147 |
<page |
|
148 |
category="org.txm.treesearch.preferences.SyntacticAnnotationPreferencePage" |
|
143 | 149 |
class="org.txm.tigersearch.rcp.preferences.TigerSearchPreferencePage" |
144 | 150 |
id="org.txm.tigersearch.rcp.preferences.TigerSearchPreferencePage" |
145 | 151 |
name="%page.name"> |
TXM/trunk/org.txm.searchengine.cqp.core/plugin.xml (revision 3537) | ||
---|---|---|
6 | 6 |
<SearchEngine |
7 | 7 |
class="org.txm.searchengine.cqp.CQPSearchEngine"> |
8 | 8 |
</SearchEngine> |
9 |
<SearchEngine |
|
10 |
class="org.txm.searchengine.cqp.TXTSearchEngine"> |
|
11 |
</SearchEngine> |
|
12 | 9 |
</extension> |
13 | 10 |
<extension |
14 | 11 |
point="org.eclipse.core.runtime.preferences"> |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/preferences/CoNLLUSearchPreferencePage.java (revision 3537) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.preferences; |
|
2 |
|
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 |
import org.eclipse.jface.preference.StringFieldEditor; |
|
5 |
import org.eclipse.ui.IWorkbench; |
|
6 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
7 |
import org.txm.rcp.IImageKeys; |
|
8 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
9 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
10 |
|
|
11 |
/** |
|
12 |
* UD preferences page |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class CoNLLUSearchPreferencePage extends TXMPreferencePage { |
|
18 |
|
|
19 |
@Override |
|
20 |
public void createFieldEditors() { |
|
21 |
this.addField(new BooleanFieldEditor(UDPreferences.IMPORT_USE_NEW_DOC_ID, "Use new odc id when importing CoNLL-U files", this.getFieldEditorParent())); |
|
22 |
this.addField(new BooleanFieldEditor(UDPreferences.KEEP_CONTRACTIONS, "Keep word contractions when importing CoNLL-U files", this.getFieldEditorParent())); |
|
23 |
this.addField(new StringFieldEditor(UDPreferences.UDPREFIX, "UD properties prefix", this.getFieldEditorParent())); |
|
24 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_HEAD_TO_PROJECT, "UD head properties to project (comma separated list)", this.getFieldEditorParent())); |
|
25 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_DEPS_TO_PROJECT, "UD deps properties to project (comma separated list)", this.getFieldEditorParent())); |
|
26 |
//this.addField(new StringFieldEditor(UDPreferences.DEFAULT_TPROPERTY, "Default T property", this.getFieldEditorParent())); |
|
27 |
//this.addField(new StringFieldEditor(UDPreferences.DEFAULT_NTPROPERTY, "Default NT property", this.getFieldEditorParent())); |
|
28 |
} |
|
29 |
|
|
30 |
/* |
|
31 |
* (non-Javadoc) |
|
32 |
* |
|
33 |
* @see |
|
34 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
35 |
*/ |
|
36 |
@Override |
|
37 |
public void init(IWorkbench workbench) { |
|
38 |
this.setPreferenceStore(new TXMPreferenceStore(UDPreferences.getInstance().getPreferencesNodeQualifier())); |
|
39 |
this.setDescription("UD"); |
|
40 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/UD.png")); |
|
41 |
} |
|
42 |
} |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/preferences/UDTreePreferencePage.java (revision 3537) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.preferences; |
|
2 |
|
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 |
import org.eclipse.jface.preference.StringFieldEditor; |
|
5 |
import org.eclipse.ui.IWorkbench; |
|
6 |
import org.txm.conllu.core.preferences.UDTreePreferences; |
|
7 |
import org.txm.rcp.IImageKeys; |
|
8 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
9 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
10 |
|
|
11 |
/** |
|
12 |
* UD Tree preferences page |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class UDTreePreferencePage extends TXMPreferencePage { |
|
18 |
|
|
19 |
@Override |
|
20 |
public void createFieldEditors() { |
|
21 |
|
|
22 |
this.addField(new StringFieldEditor(UDTreePreferences.ENGINE, "Engine", this.getFieldEditorParent())); |
|
23 |
|
|
24 |
this.addField(new BooleanFieldEditor(UDTreePreferences.PRINTCONLLUSENTENCES, "Print CoNLL-U sentence when browsing the matches", this.getFieldEditorParent())); |
|
25 |
} |
|
26 |
|
|
27 |
/* |
|
28 |
* (non-Javadoc) |
|
29 |
* |
|
30 |
* @see |
|
31 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
32 |
*/ |
|
33 |
@Override |
|
34 |
public void init(IWorkbench workbench) { |
|
35 |
|
|
36 |
this.setPreferenceStore(new TXMPreferenceStore(UDTreePreferences.getInstance().getPreferencesNodeQualifier())); |
|
37 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/UD.png")); |
|
38 |
} |
|
39 |
} |
|
0 | 40 |
TXM/trunk/org.txm.conllu.rcp/src/org/txm/conllu/rcp/preferences/CoNLLUPreferencePage.java (revision 3537) | ||
---|---|---|
1 |
package org.txm.conllu.rcp.preferences; |
|
2 |
|
|
3 |
import org.eclipse.jface.preference.BooleanFieldEditor; |
|
4 |
import org.eclipse.jface.preference.StringFieldEditor; |
|
5 |
import org.eclipse.ui.IWorkbench; |
|
6 |
import org.txm.conllu.core.preferences.UDPreferences; |
|
7 |
import org.txm.rcp.IImageKeys; |
|
8 |
import org.txm.rcp.preferences.TXMPreferencePage; |
|
9 |
import org.txm.rcp.preferences.TXMPreferenceStore; |
|
10 |
|
|
11 |
/** |
|
12 |
* UD preferences page |
|
13 |
* |
|
14 |
* @author mdecorde |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class CoNLLUPreferencePage extends TXMPreferencePage { |
|
18 |
|
|
19 |
@Override |
|
20 |
public void createFieldEditors() { |
|
21 |
|
|
22 |
this.addField(new BooleanFieldEditor(UDPreferences.IMPORT_USE_NEW_DOC_ID, "Use new odc id when importing CoNLL-U files", this.getFieldEditorParent())); |
|
23 |
this.addField(new BooleanFieldEditor(UDPreferences.KEEP_CONTRACTIONS, "Keep word contractions when importing CoNLL-U files", this.getFieldEditorParent())); |
|
24 |
this.addField(new StringFieldEditor(UDPreferences.UDPREFIX, "UD properties prefix", this.getFieldEditorParent())); |
|
25 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_HEAD_TO_PROJECT, "UD head properties to project (comma separated list)", this.getFieldEditorParent())); |
|
26 |
this.addField(new StringFieldEditor(UDPreferences.IMPORT_DEPS_TO_PROJECT, "UD deps properties to project (comma separated list)", this.getFieldEditorParent())); |
|
27 |
} |
|
28 |
|
|
29 |
/* |
|
30 |
* (non-Javadoc) |
|
31 |
* |
|
32 |
* @see |
|
33 |
* org.eclipse.ui.IWorkbenchPreferencePage#init(org.eclipse.ui.IWorkbench) |
|
34 |
*/ |
|
35 |
@Override |
|
36 |
public void init(IWorkbench workbench) { |
|
37 |
|
|
38 |
this.setPreferenceStore(new TXMPreferenceStore(UDPreferences.getInstance().getPreferencesNodeQualifier())); |
|
39 |
this.setImageDescriptor(IImageKeys.getImageDescriptor(this.getClass(), "icons/functions/UD.png")); |
|
40 |
} |
|
41 |
} |
|
0 | 42 |
TXM/trunk/org.txm.conllu.rcp/res/org/txm/conllu/rcp/js/conllu.js (revision 3537) | ||
---|---|---|
1 |
// -*- Mode: JavaScript; tab-width: 4; indent-tabs-mode: nil; -*- |
|
2 |
// vim:set ft=javascript ts=4 sw=4 sts=4 cindent: |
|
3 |
|
|
4 |
/* |
|
5 |
CoNLL-U format library for JavaScript. |
|
6 |
Home: http://github.com/spyysalo/conllu.js |
|
7 |
Format: http://universaldependencies.github.io/docs/format.html |
|
8 |
|
|
9 |
Author: Sampo Pyysalo |
|
10 |
License: MIT (http://opensource.org/licenses/MIT) |
|
11 |
*/ |
|
12 |
|
|
13 |
var ConllU = (function(window, undefined) { |
|
14 |
|
|
15 |
/* |
|
16 |
* ConllU.Document: represents CoNLL-U document |
|
17 |
*/ |
|
18 |
|
|
19 |
var Document = function() { |
|
20 |
this.reset(); |
|
21 |
}; |
|
22 |
|
|
23 |
Document.prototype.reset = function() { |
|
24 |
this.sentences = []; |
|
25 |
this.error = false; |
|
26 |
this.logger = function(s) { /* no-op */ }; |
|
27 |
this.strict = null; // pick heuristically |
|
28 |
}; |
|
29 |
|
|
30 |
Document.prototype.log = function(message) { |
|
31 |
this.logger(message); |
|
32 |
}; |
|
33 |
|
|
34 |
Document.prototype.logError = function(message) { |
|
35 |
this.log('error: ' + message); |
|
36 |
this.error = true; |
|
37 |
}; |
|
38 |
|
|
39 |
/* Parse CoNLL-U format, return Document. |
|
40 |
* (see http://universaldependencies.github.io/docs/format.html) |
|
41 |
* |
|
42 |
* CoNLL-U files contain three types of lines: |
|
43 |
* 1. Word lines |
|
44 |
* 2. Blank lines marking sentence boundaries |
|
45 |
* 3. Comment lines starting with a hash ("#") |
|
46 |
* |
|
47 |
* Each word line has the following format |
|
48 |
* 1. ID: Word index, integer starting at 1 for each new sentence; |
|
49 |
* may be a range for tokens with multiple words; may be a decimal |
|
50 |
* number for empty nodes. |
|
51 |
* 2. FORM: Word form or punctuation symbol. |
|
52 |
* 3. LEMMA: Lemma or stem of word form. |
|
53 |
* 4. UPOSTAG: Universal part-of-speech tag. |
|
54 |
* 5. XPOSTAG: Language-specific part-of-speech tag; underscore |
|
55 |
* if not available. |
|
56 |
* 6. FEATS: List of morphological features from the Universal |
|
57 |
* feature inventory or from a defined language-specific extension; |
|
58 |
* underscore if not available. |
|
59 |
* 7. HEAD: Head of the current token, which is either a value of ID |
|
60 |
* or zero (0). |
|
61 |
* 8. DEPREL: Universal Stanford dependency relation to the HEAD |
|
62 |
* (root iff HEAD = 0) or a defined language-specific subtype |
|
63 |
* of one. |
|
64 |
* 9. DEPS: List of secondary dependencies (head-deprel pairs). |
|
65 |
* 10. MISC: Any other annotation. |
|
66 |
*/ |
|
67 |
Document.prototype.parse = function(input, logger, strict) { |
|
68 |
// discard previous state, if any |
|
69 |
this.reset(); |
|
70 |
|
|
71 |
if (logger !== undefined) { |
|
72 |
this.logger = logger; |
|
73 |
} |
|
74 |
if (strict !== undefined) { |
|
75 |
this.strict = strict; |
|
76 |
} |
|
77 |
|
|
78 |
// TODO: handle other newline formats |
|
79 |
var lines = input.split('\n'); |
|
80 |
|
|
81 |
if (this.strict === null) { |
|
82 |
this.strict = selectParsingMode(input, this.logger); |
|
83 |
} |
|
84 |
|
|
85 |
// select splitter to use for dividing the lines into fields. |
|
86 |
var splitter = selectFieldSplitter(input, this.logger, this.strict); |
|
87 |
|
|
88 |
var elements = [], |
|
89 |
comments = [], |
|
90 |
beforeSentence = true; |
|
91 |
for (var idx=0; idx<lines.length; idx++) { |
|
92 |
var line = lines[idx], that = this; |
|
93 |
|
|
94 |
var logLineError = function(message) { |
|
95 |
that.logError('line '+(idx+1)+': '+message+' ("'+line+'")'); |
|
96 |
that.error = true; |
|
97 |
} |
|
98 |
|
|
99 |
if (isComment(line)) { |
|
100 |
if (beforeSentence) { |
|
101 |
comments.push(line); |
|
102 |
} else { |
|
103 |
logLineError('comments must precede sentence, ignoring'); |
|
104 |
} |
|
105 |
continue; |
|
106 |
} |
|
107 |
|
|
108 |
// non-comment, assume inside sentence until terminated by |
|
109 |
// blank line |
|
110 |
beforeSentence = false; |
|
111 |
|
|
112 |
var fields = splitter(line); |
|
113 |
|
|
114 |
if (fields.length === 0) { |
|
115 |
// empty line, terminates sentence |
|
116 |
if (elements.length !== 0) { |
|
117 |
var sId = 'S' + (this.sentences.length+1); |
|
118 |
var sentence = new Sentence(sId, elements, comments); |
|
119 |
this.sentences.push(sentence); |
|
120 |
} else { |
|
121 |
logLineError('empty sentence, ignoring'); |
|
122 |
} |
|
123 |
// reset |
|
124 |
elements = []; |
|
125 |
comments = []; |
|
126 |
beforeSentence = true; |
|
127 |
continue; |
|
128 |
} |
|
129 |
|
|
130 |
if (fields.length !== 10) { |
|
131 |
logLineError('expected 10 fields, got '+fields.length); |
|
132 |
repairFields(fields, this.logger); |
|
133 |
} |
|
134 |
|
|
135 |
var element = new Element(fields, idx, line); |
|
136 |
|
|
137 |
var issues = element.validate(); |
|
138 |
for (var j=0; j<issues.length; j++) { |
|
139 |
logLineError(issues[j]); |
|
140 |
} |
|
141 |
if (issues.length !== 0) { |
|
142 |
if (!element.repair(this.logger)) { |
|
143 |
logLineError('repair failed, discarding line'); |
|
144 |
continue; // failed, ignore line |
|
145 |
} |
|
146 |
} |
|
147 |
|
|
148 |
elements.push(element); |
|
149 |
} |
|
150 |
|
|
151 |
// If elements is non-empty, last sentence ended without its |
|
152 |
// expected terminating empty line. Process, but warn if strict. |
|
153 |
if (elements.length !== 0) { |
|
154 |
if (this.strict) { |
|
155 |
this.logError('missing blank line after last sentence'); |
|
156 |
} |
|
157 |
var sId = 'S' + (this.sentences.length+1); |
|
158 |
var sentence = new Sentence(sId, elements, comments); |
|
159 |
this.sentences.push(sentence); |
|
160 |
// reset |
|
161 |
elements = []; |
|
162 |
comments = []; |
|
163 |
beforeSentence = true; |
|
164 |
} |
|
165 |
|
|
166 |
// If comments is non-empty, there were comments after the |
|
167 |
// terminating empty line. Warn and discard. |
|
168 |
if (comments.length !== 0) { |
|
169 |
this.logError('comments may not occur after last sentence, '+ |
|
170 |
'ignoring'); |
|
171 |
} |
|
172 |
|
|
173 |
return this; |
|
174 |
} |
|
175 |
|
|
176 |
Document.prototype.toBrat = function(logger, includeEmpty) { |
|
177 |
if (logger !== undefined) { |
|
178 |
this.logger = logger; |
|
179 |
} |
|
180 |
if (includeEmpty === undefined) { |
|
181 |
includeEmpty = false; // hide empty nodes by default |
|
182 |
} |
|
183 |
|
|
184 |
// merge brat data over all sentences |
|
185 |
var mergedBratData = {}, |
|
186 |
textOffset = 0; |
|
187 |
var categories = [ |
|
188 |
'entities', |
|
189 |
'attributes', |
|
190 |
'relations', |
|
191 |
'comments', |
|
192 |
'styles', |
|
193 |
'sentlabels' |
|
194 |
]; |
|
195 |
for (var i=0; i<categories.length; i++) { |
|
196 |
mergedBratData[categories[i]] = []; |
|
197 |
} |
|
198 |
mergedBratData['text'] = ''; |
|
199 |
for (var i=0; i<this.sentences.length; i++) { |
|
200 |
var sentence = this.sentences[i]; |
|
201 |
|
|
202 |
var issues = sentence.validate(); |
|
203 |
for (var j=0; j<issues.length; j++) { |
|
204 |
this.logError(issues[j]); |
|
205 |
} |
|
206 |
if (issues.length !== 0) { |
|
207 |
if (!sentence.repair(this.logger)) { |
|
208 |
this.logError('repair failed, discarding sentence'); |
|
209 |
continue; |
|
210 |
} |
|
211 |
} |
|
212 |
sentence.setBaseOffset(textOffset !== 0 ? textOffset + 1 : 0); |
|
213 |
bratData = sentence.toBrat(includeEmpty); |
|
214 |
|
|
215 |
// merge |
|
216 |
if (mergedBratData['text'].length !== 0) { |
|
217 |
mergedBratData['text'] += '\n'; |
|
218 |
textOffset += 1; |
|
219 |
} |
|
220 |
mergedBratData['text'] += bratData['text']; |
|
221 |
textOffset += bratData['text'].length; |
|
222 |
for (var j=0; j<categories.length; j++) { |
|
223 |
var c = categories[j]; |
|
224 |
mergedBratData[c] = mergedBratData[c].concat(bratData[c]); |
|
225 |
} |
|
226 |
} |
|
227 |
|
|
228 |
// to avoid brat breakage on error, don't send empty text |
|
229 |
if (mergedBratData['text'].length === 0) { |
|
230 |
mergedBratData['text'] = '<EMPTY>'; |
|
231 |
} |
|
232 |
|
|
233 |
mergedBratData['error'] = this.error; |
|
234 |
|
|
235 |
return mergedBratData; |
|
236 |
}; |
|
237 |
|
|
238 |
/* |
|
239 |
* ConllU.Sentence: represents CoNLL-U sentence |
|
240 |
*/ |
|
241 |
|
|
242 |
var Sentence = function(sentenceId, elements, comments) { |
|
243 |
this.id = sentenceId; |
|
244 |
this.elements = elements; |
|
245 |
this.comments = comments; |
|
246 |
this.baseOffset = 0; |
|
247 |
}; |
|
248 |
|
|
249 |
// set offset of first character in sentence (for standoff |
|
250 |
// generation) |
|
251 |
Sentence.prototype.setBaseOffset = function(baseOffset) { |
|
252 |
this.baseOffset = baseOffset; |
|
253 |
} |
|
254 |
|
|
255 |
Sentence.prototype.dependencies = function() { |
|
256 |
var dependencies = []; |
|
257 |
|
|
258 |
for (var i=0; i<this.elements.length; i++) { |
|
259 |
var element = this.elements[i]; |
|
260 |
dependencies = dependencies.concat(element.dependencies()); |
|
261 |
} |
|
262 |
|
|
263 |
return dependencies; |
|
264 |
}; |
|
265 |
|
|
266 |
Sentence.prototype.words = function(includeEmpty) { |
|
267 |
return this.elements.filter(function(e) { |
|
268 |
return (e.isWord() || (includeEmpty && e.isEmptyNode())); |
|
269 |
}); |
|
270 |
}; |
|
271 |
|
|
272 |
Sentence.prototype.multiwords = function() { |
|
273 |
return this.elements.filter(function(e) { |
|
274 |
return e.isMultiword(); |
|
275 |
}); |
|
276 |
}; |
|
277 |
|
|
278 |
Sentence.prototype.tokens = function() { |
|
279 |
// extract token sequence by omitting word IDs that are |
|
280 |
// included in a multiword token range. |
|
281 |
var multiwords = this.multiwords(); |
|
282 |
var inRange = {}; |
|
283 |
for (var i=0; i<multiwords.length; i++) { |
|
284 |
var mw = multiwords[i]; |
|
285 |
for (var j=mw.rangeFrom(); j<=mw.rangeTo(); j++) { |
|
286 |
inRange[j] = true; |
|
287 |
} |
|
288 |
} |
|
289 |
return this.elements.filter(function(e) { |
|
290 |
return e.isToken(inRange); |
|
291 |
}); |
|
292 |
}; |
|
293 |
|
|
294 |
// return words with possible modifications for visualization with |
|
295 |
// brat |
|
296 |
Sentence.prototype.bratWords = function(includeEmpty) { |
|
297 |
var words = this.words(includeEmpty); |
|
298 |
|
|
299 |
for (var i=0; i<words.length; i++) { |
|
300 |
if (isRtl(words[i].form)) { |
|
301 |
words[i] = deepCopy(words[i]); |
|
302 |
words[i].form = rtlFix(words[i].form); |
|
303 |
} |
|
304 |
} |
|
305 |
|
|
306 |
return words; |
|
307 |
}; |
|
308 |
|
|
309 |
// return tokens with possible modifications for visualization |
|
310 |
// with brat |
|
311 |
Sentence.prototype.bratTokens = function() { |
|
312 |
var tokens = this.tokens(); |
|
313 |
|
|
314 |
for (var i=0; i<tokens.length; i++) { |
|
315 |
tokens[i] = deepCopy(tokens[i]); |
|
316 |
tokens[i].form = rtlFix(tokens[i].form); |
|
317 |
} |
|
318 |
|
|
319 |
return tokens; |
|
320 |
}; |
|
321 |
|
|
322 |
// return the text of the sentence for visualization with brat |
|
323 |
Sentence.prototype.bratText = function(includeEmpty) { |
|
324 |
var words = this.bratWords(includeEmpty); |
|
325 |
var tokens = this.bratTokens(); |
|
326 |
|
|
327 |
var wordText = words.map(function(w) { return w.form }).join(' '); |
|
328 |
var tokenText = tokens.map(function(w) { return w.form }).join(' '); |
|
329 |
|
|
330 |
var combinedText = wordText; |
|
331 |
if (wordText != tokenText) { |
|
332 |
combinedText += '\n' + tokenText; |
|
333 |
} |
|
334 |
|
|
335 |
return combinedText; |
|
336 |
}; |
|
337 |
|
|
338 |
// return the annotated text spans of the sentence for visualization |
|
339 |
// with brat. |
|
340 |
Sentence.prototype.bratSpans = function(includeEmpty) { |
|
341 |
var spans = [], |
|
342 |
offset = this.baseOffset; |
|
343 |
|
|
344 |
// create an annotation for each word |
|
345 |
var words = this.bratWords(includeEmpty); |
|
346 |
for (var i=0; i<words.length; i++) { |
|
347 |
var length = words[i].form.length; |
|
348 |
spans.push([this.id+'-T'+words[i].id, words[i].upostag, |
|
349 |
[[offset, offset+length]]]); |
|
350 |
offset += length + 1; |
|
351 |
} |
|
352 |
|
|
353 |
return spans; |
|
354 |
} |
|
355 |
|
|
356 |
// return attributes of sentence annotations for visualization |
|
357 |
// with brat. |
|
358 |
Sentence.prototype.bratAttributes = function(includeEmpty) { |
|
359 |
var words = this.words(includeEmpty); |
|
360 |
|
|
361 |
// create attributes for word features |
|
362 |
var attributes = [], |
|
363 |
aidseq = 1; |
|
364 |
for (var i=0; i<words.length; i++) { |
|
365 |
var word = words[i], |
|
366 |
tid = this.id+'-T'+word.id; |
|
367 |
var nameVals = word.features(); |
|
368 |
for (var j=0; j<nameVals.length; j++) { |
|
369 |
var name = nameVals[j][0], |
|
370 |
value = nameVals[j][1]; |
|
371 |
attributes.push([this.id+'-A'+aidseq++, name, tid, value]); |
|
372 |
} |
|
373 |
} |
|
374 |
|
|
375 |
return attributes; |
|
376 |
}; |
|
377 |
|
|
378 |
// return relations for sentence dependencies for visualization |
|
379 |
// with brat. |
|
380 |
Sentence.prototype.bratRelations = function(includeEmpty) { |
|
381 |
var dependencies = this.dependencies(); |
|
382 |
var relations = []; |
|
383 |
|
|
384 |
for (var i=0; i<dependencies.length; i++) { |
|
385 |
var dep = dependencies[i]; |
|
386 |
relations.push([this.id+'-R'+i, dep[2], |
|
387 |
[ [ 'arg1', this.id+'-T'+dep[1] ], |
|
388 |
[ 'arg2', this.id+'-T'+dep[0] ] ] ]); |
|
389 |
} |
|
390 |
|
|
391 |
return relations; |
|
392 |
}; |
|
393 |
|
|
394 |
// return comments (notes) on sentence annotations for |
|
395 |
// visualization with brat. |
|
396 |
Sentence.prototype.bratComments = function(includeEmpty) { |
|
397 |
var words = this.words(includeEmpty); |
|
398 |
|
|
399 |
// TODO: better visualization for LEMMA, XPOSTAG, and MISC. |
|
400 |
var comments = []; |
|
401 |
for (var i=0; i<words.length; i++) { |
|
402 |
var word = words[i], |
|
403 |
tid = this.id+'-T'+word.id, |
|
404 |
label = 'AnnotatorNotes'; |
|
405 |
comments.push([tid, label, 'Lemma: ' + word.lemma]); |
|
406 |
if (word.xpostag !== '_') { |
|
407 |
comments.push([tid, label, 'Xpostag: ' + word.xpostag]); |
|
408 |
} |
|
409 |
if (word.misc !== '_') { |
|
410 |
comments.push([tid, label, 'Misc: ' + word.misc]); |
|
411 |
} |
|
412 |
} |
|
413 |
|
|
414 |
return comments; |
|
415 |
}; |
|
416 |
|
|
417 |
// Return styles on sentence annotations for visualization with |
|
418 |
// brat. Note: this feature is an extension of both the CoNLL-U |
|
419 |
// comment format and the basic brat data format. |
|
420 |
Sentence.prototype.bratStyles = function(includeEmpty) { |
|
421 |
var styles = [], |
|
422 |
wildcards = []; |
|
423 |
|
|
424 |
for (var i=0; i<this.comments.length; i++) { |
|
425 |
var comment = this.comments[i]; |
|
426 |
|
|
427 |
m = comment.match(/^(\#\s*visual-style\s+)(.*)/); |
|
428 |
if (!m) { |
|
429 |
continue; |
|
430 |
} |
|
431 |
var styleSpec = m[2]; |
|
432 |
|
|
433 |
// Attempt to parse as a visual style specification. The |
|
434 |
// expected format is "REF<SPACE>STYLE", where REF |
|
435 |
// is either a single ID (for a span), a space-separated |
|
436 |
// ID1 ID2 TYPE triple (for a relation), or a special |
|
437 |
// wildcard value like "arcs", and STYLE is either |
|
438 |
// a colon-separated key-value pair or a color. |
|
439 |
m = styleSpec.match(/^([^\t]+)\s+(\S+)\s*$/); |
|
440 |
if (!m) { |
|
441 |
// TODO: avoid console.log |
|
442 |
console.log('warning: failed to parse: "'+comment+'"'); |
|
443 |
continue; |
|
444 |
} |
|
445 |
var reference = m[1], style = m[2]; |
|
446 |
|
|
447 |
// split style into key and value, adding a key to |
|
448 |
// color-only styles as needed for the reference type. |
|
449 |
var key, value; |
|
450 |
m = style.match(/^(\S+):(\S+)$/); |
|
451 |
if (m) { |
|
452 |
key = m[1]; |
|
453 |
value = m[2]; |
|
454 |
} else { |
|
455 |
value = style; |
|
456 |
if (reference === 'arcs' || reference.indexOf(' ') !== -1) { |
|
457 |
key = 'color'; |
|
458 |
} else { |
|
459 |
key = 'bgColor'; |
|
460 |
} |
|
461 |
} |
|
462 |
|
|
463 |
// store wildcards for separate later processing |
|
464 |
if (reference.match(/^(nodes|arcs)$/)) { |
|
465 |
wildcards.push([reference, key, value]); |
|
466 |
continue; |
|
467 |
} |
|
468 |
|
|
469 |
// adjust every ID in reference for brat |
|
470 |
if (reference.indexOf(' ') === -1) { |
|
471 |
reference = this.id + '-T' + reference; |
|
472 |
} else { |
|
473 |
reference = reference.split(' '); |
|
474 |
reference[0] = this.id + '-T' + reference[0]; |
|
475 |
reference[1] = this.id + '-T' + reference[1]; |
|
476 |
} |
|
477 |
|
|
478 |
styles.push([reference, key, value]); |
|
479 |
} |
|
480 |
|
|
481 |
// for expanding wildcards, first determine which words / arcs |
|
482 |
// styles have already been set, and then add the style to |
|
483 |
// everything that hasn't. |
|
484 |
var setStyle = {}; |
|
485 |
for (var i=0; i<styles.length; i++) { |
|
486 |
setStyle[styles[i][0].concat([styles[i][1]])] = true; |
|
487 |
} |
|
488 |
for (var i=0; i<wildcards.length; i++) { |
|
489 |
var reference = wildcards[i][0], |
|
490 |
key = wildcards[i][1], |
|
491 |
value = wildcards[i][2]; |
|
492 |
if (reference === 'nodes') { |
|
493 |
var words = this.words(includeEmpty); |
|
494 |
for (var j=0; j<words.length; j++) { |
|
495 |
var r = this.id + '-T' + words[j].id; |
|
496 |
if (!setStyle[r.concat([key])]) { |
|
497 |
styles.push([r, key, value]); |
|
498 |
setStyle[r.concat([key])] = true; |
|
499 |
} |
|
500 |
} |
|
501 |
} else if (reference === 'arcs') { |
|
502 |
var deps = this.dependencies(); |
|
503 |
for (var j=0; j<deps.length; j++) { |
|
504 |
var r = [this.id + '-T' + deps[j][1], |
|
505 |
this.id + '-T' + deps[j][0], |
|
506 |
deps[j][2]]; |
|
507 |
if (!setStyle[r.concat([key])]) { |
|
508 |
styles.push([r, key, value]); |
|
509 |
setStyle[r.concat([key])] = true; |
|
510 |
} |
|
511 |
} |
|
512 |
} else { |
|
513 |
console.log('internal error'); |
|
514 |
} |
|
515 |
} |
|
516 |
|
|
517 |
return styles; |
|
518 |
}; |
|
519 |
|
|
520 |
// Return label of sentence for visualization with brat, or null |
|
521 |
// if not defined. Note: this feature is an extension of both the |
|
522 |
// CoNLL-U comment format and the basic brat data format. |
|
523 |
Sentence.prototype.bratLabel = function() { |
|
524 |
var label = null; |
|
525 |
|
|
526 |
for (var i=0; i<this.comments.length; i++) { |
|
527 |
var comment = this.comments[i]; |
|
528 |
|
|
529 |
m = comment.match(/^(\#\s*sentence-label\b)(.*)/); |
|
530 |
if (!m) { |
|
531 |
continue; |
|
532 |
} |
|
533 |
label = m[2].trim(); |
|
534 |
} |
|
535 |
return label; |
|
536 |
}; |
|
537 |
|
|
538 |
// Return representation of sentence in brat embedded format (see |
|
539 |
// http://brat.nlplab.org/embed.html). |
|
540 |
// If includeEmpty is truthy, include empty nodes in the representation. |
|
541 |
// Note: "styles" is an extension, not part of the basic format. |
|
542 |
Sentence.prototype.toBrat = function(includeEmpty) { |
|
543 |
var text = this.bratText(includeEmpty); |
|
544 |
var spans = this.bratSpans(includeEmpty); |
|
545 |
var attributes = this.bratAttributes(includeEmpty); |
|
546 |
var relations = this.bratRelations(includeEmpty); |
|
547 |
var comments = this.bratComments(includeEmpty); |
|
548 |
var styles = this.bratStyles(includeEmpty); |
|
549 |
var labels = [this.bratLabel()]; |
|
550 |
|
|
551 |
return { |
|
552 |
'text': text, |
|
553 |
'entities': spans, |
|
554 |
'attributes': attributes, |
|
555 |
'relations': relations, |
|
556 |
'comments': comments, |
|
557 |
'styles': styles, |
|
558 |
'sentlabels': labels, |
|
559 |
}; |
|
560 |
}; |
|
561 |
|
|
562 |
Sentence.prototype.elementById = function() { |
|
563 |
var elementById = {}; |
|
564 |
|
|
565 |
for (var i=0; i<this.elements.length; i++) { |
|
566 |
elementById[this.elements[i].id] = this.elements[i]; |
|
567 |
} |
|
568 |
|
|
569 |
return elementById; |
|
570 |
}; |
|
571 |
|
|
572 |
Sentence.prototype.addError = function(issue, element, issues) { |
|
573 |
issues.push('line '+(element.lineidx+1)+': '+issue+' ("'+element.line+'")'); |
|
574 |
} |
|
575 |
|
|
576 |
// Check validity of the sentence. Return list of strings |
|
577 |
// representing issues found in validation (empty list if none). |
|
578 |
Sentence.prototype.validate = function() { |
|
579 |
var issues = []; |
|
580 |
|
|
581 |
this.validateUniqueIds(issues); |
|
582 |
this.validateWordSequence(issues); |
|
583 |
this.validateMultiwordSequence(issues); |
|
584 |
this.validateEmptyNodeSequence(issues); |
|
585 |
this.validateReferences(issues); |
|
586 |
|
|
587 |
return issues; |
|
588 |
}; |
|
589 |
|
|
590 |
// Check for presence of ID duplicates |
|
591 |
Sentence.prototype.validateUniqueIds = function(issues) { |
|
592 |
issues = (issues !== undefined ? issues : []); |
|
593 |
|
|
594 |
var initialIssueCount = issues.length; |
|
595 |
var elementById = {}; |
|
596 |
|
|
597 |
for (var i=0; i<this.elements.length; i++) { |
|
598 |
var element = this.elements[i]; |
|
599 |
if (elementById[element.id] !== undefined) { |
|
600 |
this.addError('non-unique ID "'+element.id+'"', |
|
601 |
element, issues); |
|
602 |
} |
|
603 |
elementById[element.id] = element; |
|
604 |
} |
|
605 |
|
|
606 |
return issues.length === initialIssueCount; |
|
607 |
}; |
|
608 |
|
|
609 |
// Check validity of word ID sequence (should be 1,2,3,...) |
|
610 |
Sentence.prototype.validateWordSequence = function(issues) { |
|
611 |
issues = (issues !== undefined ? issues : []); |
|
612 |
|
|
613 |
var initialIssueCount = issues.length; |
|
614 |
var expectedId = 1; |
|
615 |
|
|
616 |
for (var i=0; i<this.elements.length; i++) { |
|
617 |
var element = this.elements[i]; |
|
618 |
|
|
619 |
if (element.isMultiword() || element.isEmptyNode()) { |
|
620 |
continue; // only check simple word sequence here |
|
621 |
} |
|
622 |
|
|
623 |
if (parseInt(element.id, 10) !== expectedId) { |
|
624 |
this.addError('word IDs should be 1,2,3,..., ' + |
|
625 |
'expected '+expectedId+', got '+element.id, |
|
626 |
element, issues); |
|
627 |
} |
|
628 |
expectedId = parseInt(element.id, 10) + 1; |
|
629 |
} |
|
630 |
|
|
631 |
return issues.length === initialIssueCount; |
|
632 |
}; |
|
633 |
|
|
634 |
// Check that multiword token ranges are valid |
|
635 |
Sentence.prototype.validateMultiwordSequence = function(issues) { |
|
636 |
issues = (issues !== undefined ? issues : []); |
|
637 |
|
|
638 |
var initialIssueCount = issues.length; |
|
639 |
var expectedId = 1; |
|
640 |
|
|
641 |
for (var i=0; i<this.elements.length; i++) { |
|
642 |
var element = this.elements[i]; |
|
643 |
|
|
644 |
if (element.isMultiword() && element.rangeFrom() !== expectedId) { |
|
645 |
this.addError('multiword tokens must appear before '+ |
|
646 |
'first word in their range', |
|
647 |
element, issues); |
|
648 |
} else { |
|
649 |
expectedId = parseInt(element.id, 10) + 1; |
|
650 |
} |
|
651 |
} |
|
652 |
|
|
653 |
return issues.length === initialIssueCount; |
|
654 |
}; |
|
655 |
|
|
656 |
Sentence.prototype.validateEmptyNodeSequence = function(issues) { |
|
657 |
issues = (issues !== undefined ? issues : []); |
|
658 |
|
|
659 |
var initialIssueCount = issues.length; |
|
660 |
var previousWordId = '0'; // TODO check https://github.com/UniversalDependencies/docs/issues/382 |
|
661 |
var nextEmptyNodeId = 1; |
|
662 |
|
|
663 |
for (var i=0; i<this.elements.length; i++) { |
|
664 |
var element = this.elements[i]; |
|
665 |
|
|
666 |
if (element.isWord()) { |
|
667 |
previousWordId = element.id; |
|
668 |
nextEmptyNodeId = 1; |
|
669 |
} else if (element.isEmptyNode()) { |
|
670 |
var expectedId = previousWordId + '.' + nextEmptyNodeId; |
|
671 |
if (element.id !== expectedId) { |
|
672 |
this.addError('empty node IDs should be *.1, *.2, ... ' + |
|
673 |
'expected '+expectedId+', got '+element.id, |
|
674 |
element, issues); |
|
675 |
} |
|
676 |
nextEmptyNodeId++; |
|
677 |
} |
|
678 |
} |
|
679 |
|
|
680 |
return issues.length === initialIssueCount; |
|
681 |
} |
|
682 |
|
|
683 |
// Check validity of ID references in HEAD and DEPS. |
|
684 |
Sentence.prototype.validateReferences = function(issues) { |
|
685 |
issues = (issues !== undefined ? issues : []); |
|
686 |
|
|
687 |
var initialIssueCount = issues.length; |
|
688 |
var elementById = this.elementById(); |
|
689 |
|
|
690 |
for (var i=0; i<this.elements.length; i++) { |
|
691 |
var element = this.elements[i]; |
|
692 |
|
|
693 |
// validate HEAD |
|
694 |
if (!element.validHeadReference(elementById)) { |
|
695 |
this.addError('HEAD is not valid ID: "'+element.head+'"', |
|
696 |
element, issues); |
|
697 |
} |
|
698 |
|
|
699 |
// validate DEPS |
|
700 |
var elemDeps = element.dependencies(true); |
|
701 |
for (var j=0; j<elemDeps.length; j++) { |
|
702 |
var head = elemDeps[j][1]; |
|
703 |
if (head !== '0' && elementById[head] === undefined) { |
|
704 |
this.addError('invalid ID "'+head+'" in DEPS', |
|
705 |
element, issues); |
|
706 |
} |
|
707 |
} |
|
708 |
} |
|
709 |
|
|
710 |
return issues.length === initialIssueCount; |
|
711 |
}; |
|
712 |
|
|
713 |
Sentence.prototype.repair = function(log) { |
|
714 |
log = (log !== undefined ? log : nullLogger); |
|
715 |
|
|
716 |
if (!this.validateUniqueIds()) { |
|
717 |
this.repairUniqueIds(log); |
|
718 |
} |
|
719 |
|
|
720 |
if (!this.validateWordSequence()) { |
|
721 |
this.repairWordSequence(log); |
|
722 |
} |
|
723 |
|
|
724 |
if (!this.validateMultiwordSequence()) { |
|
725 |
this.repairMultiwordSequence(log); |
|
726 |
} |
|
727 |
|
|
728 |
if (!this.validateEmptyNodeSequence()) { |
|
729 |
this.repairEmptyNodeSequence(log); |
|
730 |
} |
|
731 |
|
|
732 |
if (!this.validateReferences()) { |
|
733 |
this.repairReferences(log); |
|
734 |
} |
|
735 |
|
|
736 |
var issues = this.validate(); |
|
737 |
return issues.length === 0; |
|
738 |
}; |
|
739 |
|
|
740 |
Sentence.prototype.repairUniqueIds = function(log) { |
|
741 |
log = (log !== undefined ? log : nullLogger); |
|
742 |
|
|
743 |
var elementById = {}, |
|
744 |
filtered = []; |
|
745 |
|
|
746 |
for (var i=0; i<this.elements.length; i++) { |
|
747 |
var element = this.elements[i]; |
|
748 |
if (elementById[element.id] === undefined) { |
|
749 |
elementById[element.id] = element; |
|
750 |
filtered.push(element); |
|
751 |
} else { |
|
752 |
log('repair: remove element with duplicate ID "'+element.id+'"'); |
|
753 |
} |
|
754 |
} |
|
755 |
this.elements = filtered; |
|
756 |
|
|
757 |
return true; |
|
758 |
}; |
|
759 |
|
|
760 |
Sentence.prototype.repairWordSequence = function(log) { |
|
761 |
log('TODO: implement ConllU.Sentence.repairWordSequence()'); |
|
762 |
return true; |
|
763 |
}; |
|
764 |
|
|
765 |
Sentence.prototype.repairMultiwordSequence = function(log) { |
|
766 |
log('TODO: implement ConllU.Sentence.repairMultiwordSequence()'); |
|
767 |
return true; |
|
768 |
}; |
|
769 |
|
|
770 |
Sentence.prototype.repairEmptyNodeSequence = function(log) { |
|
771 |
log('TODO: implement ConllU.Sentence.repairEmptyNodeSequence()'); |
|
772 |
return true; |
|
773 |
}; |
|
774 |
|
|
775 |
Sentence.prototype.repairReferences = function(log) { |
|
776 |
log = (log !== undefined ? log : nullLogger); |
|
777 |
|
|
778 |
var elementById = this.elementById(); |
|
779 |
|
|
780 |
for (var i=0; i<this.elements.length; i++) { |
|
781 |
var element = this.elements[i]; |
|
782 |
|
|
783 |
// repair HEAD if not valid |
|
784 |
if (!element.validHeadReference(elementById)) { |
|
785 |
log('repair: blanking invalid HEAD'); |
|
786 |
element.head = null; |
|
787 |
} |
|
788 |
|
|
789 |
// repair DEPS if not valid |
|
790 |
if (element.deps === '_') { |
|
791 |
continue; |
|
792 |
} |
|
793 |
var deparr = element.deps.split('|'), |
|
794 |
filtered = []; |
|
795 |
for (var j=0; j<deparr.length; j++) { |
|
796 |
var dep = deparr[j]; |
|
797 |
var m = dep.match(dependencyRegex); |
|
798 |
if (m) { |
|
799 |
var head = m[1], deprel = m[2]; |
|
800 |
if (head === '0' || elementById[head] !== undefined) { |
|
801 |
filtered.push(dep); |
|
802 |
} else { |
Formats disponibles : Unified diff