Révision 713
tmp/org.txm.tigersearch.rcp/build.properties (revision 713) | ||
---|---|---|
4 | 4 |
META-INF/,\ |
5 | 5 |
.,\ |
6 | 6 |
icons/,\ |
7 |
lib/ |
|
7 |
lib/,\ |
|
8 |
lib/dom4j-1.6.1.jar,\ |
|
9 |
lib/log4j-1.2.12.jar,\ |
|
10 |
lib/TigerSearch.jar |
tmp/org.txm.tigersearch.rcp/.classpath (revision 713) | ||
---|---|---|
15 | 15 |
<accessrule kind="accessible" pattern="**"/> |
16 | 16 |
</accessrules> |
17 | 17 |
</classpathentry> |
18 |
<classpathentry exported="true" kind="con" path="GROOVY_SUPPORT"/> |
|
19 |
<classpathentry exported="true" kind="con" path="GROOVY_DSL_SUPPORT"/> |
|
18 | 20 |
<classpathentry kind="output" path="bin"/> |
19 | 21 |
</classpath> |
tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 713) | ||
---|---|---|
323 | 323 |
org.mozilla.javascript.tools.shell, |
324 | 324 |
org.relaxng.datatype, |
325 | 325 |
org.relaxng.datatype.helpers, |
326 |
org.txm.export.ts, |
|
326 | 327 |
org.txm.function.tigersearch, |
327 | 328 |
org.txm.importer.srcmf, |
328 | 329 |
org.txm.importer.srcmf2, |
... | ... | |
343 | 344 |
tigersearch4txm.handlers |
344 | 345 |
Import-Package: ims.tiger.gui.tigergraphviewer.forest |
345 | 346 |
Bundle-Vendor: Textometrie.org |
347 |
Bundle-ClassPath: lib/dom4j-1.6.1.jar, |
|
348 |
lib/log4j-1.2.12.jar, |
|
349 |
lib/TigerSearch.jar, |
|
350 |
. |
tmp/org.txm.tigersearch.rcp/.project (revision 713) | ||
---|---|---|
22 | 22 |
</buildCommand> |
23 | 23 |
</buildSpec> |
24 | 24 |
<natures> |
25 |
<nature>org.eclipse.jdt.groovy.core.groovyNature</nature> |
|
25 | 26 |
<nature>org.eclipse.pde.PluginNature</nature> |
26 | 27 |
<nature>org.eclipse.jdt.core.javanature</nature> |
27 | 28 |
</natures> |
tmp/org.txm.tigersearch.rcp/src/org/txm/test/DrawTSSVG.groovy (revision 713) | ||
---|---|---|
1 |
/** |
|
2 |
* Main. |
|
3 |
* |
|
4 |
* @param args the args |
|
5 |
*/ |
|
6 |
// Copyright © 2010-2013 ENS de Lyon. |
|
7 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
8 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
9 |
// Sophia Antipolis, University of Paris 3. |
|
10 |
// |
|
11 |
// The TXM platform is free software: you can redistribute it |
|
12 |
// and/or modify it under the terms of the GNU General Public |
|
13 |
// License as published by the Free Software Foundation, |
|
14 |
// either version 2 of the License, or (at your option) any |
|
15 |
// later version. |
|
16 |
// |
|
17 |
// The TXM platform is distributed in the hope that it will be |
|
18 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
19 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
20 |
// PURPOSE. See the GNU General Public License for more |
|
21 |
// details. |
|
22 |
// |
|
23 |
// You should have received a copy of the GNU General |
|
24 |
// Public License along with the TXM platform. If not, see |
|
25 |
// http://www.gnu.org/licenses. |
|
26 |
// |
|
27 |
// |
|
28 |
// |
|
29 |
// $LastChangedDate:$ |
|
30 |
// $LastChangedRevision:$ |
|
31 |
// $LastChangedBy:$ |
|
32 |
// |
|
33 |
package org.txm.test |
|
34 |
|
|
35 |
import org.txm.Toolbox |
|
36 |
import org.txm.searchengine.ts.* |
|
37 |
import org.txm.utils.ExecTimer |
|
38 |
|
|
39 |
|
|
40 |
String userhome = System.getProperty("user.home") |
|
41 |
Toolbox.setParam(Toolbox.USER_TXM_HOME, "/home/mdecorde/TXM/") |
|
42 |
File configdir = new File(userhome,"TXM/Tiger/tigersearch.logprop") |
|
43 |
File registrydir = new File(userhome, "TXM/Tiger/corpora/") |
|
44 |
File svgfile = new File(userhome,"TXM/Tiger/result.svg") |
|
45 |
String id = "GRAAL" |
|
46 |
String query = """#n:[cat = "Obj"] >* #m & arity(#n, 2, 10)""" |
|
47 |
//String query = "[]" |
|
48 |
|
|
49 |
TSCorpusManager manager = new TSCorpusManager(registrydir, configdir) |
|
50 |
if(manager.isInitialized()) { |
|
51 |
|
|
52 |
ExecTimer.start() |
|
53 |
TSCorpus corpus = manager.getCorpus(id); |
|
54 |
|
|
55 |
println "T features: "+corpus.getTFeatures() |
|
56 |
println "NT features: "+corpus.getNTFeatures() |
|
57 |
|
|
58 |
TSResult result = corpus.query(query); |
|
59 |
|
|
60 |
result.setDisplayProperties(["word", "pos", "form"], "type"); |
|
61 |
|
|
62 |
TSMatch first = result.getFirst(); |
|
63 |
println "First: nb of sub graph: "+first.getNumberOfSubGraph(); |
|
64 |
first.firstSubGraph(); |
|
65 |
first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_1.svg")); |
|
66 |
println first.toHTML() + "<br>" |
|
67 |
first.nextSubGraph() |
|
68 |
first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_2.svg")); |
|
69 |
println first.toHTML() + "<br>" |
|
70 |
first.nextSubGraph() |
|
71 |
first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_3.svg")); |
|
72 |
println first.toHTML() + "<br>" |
|
73 |
|
|
74 |
TSMatch next = result.getNext(); |
|
75 |
println "Next: nb of sub graph: "+next.getNumberOfSubGraph(); |
|
76 |
next.firstSubGraph(); |
|
77 |
next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_1.svg")); |
|
78 |
println next.toHTML() + "<br>" |
|
79 |
next.nextSubGraph() |
|
80 |
next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2.svg")); |
|
81 |
println next.toHTML() + "<br>" |
|
82 |
next.nextSubGraph() |
|
83 |
next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_3.svg")); |
|
84 |
println next.toHTML() + "<br>" |
|
85 |
next.previousSubGraph() |
|
86 |
next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2bis.svg")); |
|
87 |
println next.toHTML() + "<br>" |
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
// println "make some room :)" |
|
92 |
// for(File f : new File("/home/mdecorde/Bureau/tigerexports/").listFiles()) |
|
93 |
// f.delete() |
|
94 |
// |
|
95 |
|
|
96 |
// println "SAVE XML" |
|
97 |
// result.toXml(new File("/home/mdecorde/Bureau/tigerexports/result.xml"), false, true) |
|
98 |
// |
|
99 |
// println "SAVE AS SVG" |
|
100 |
// for (int i = 0 ; i < result.getNumberOfMatch() && i < 10; i++) { |
|
101 |
// result.getMatch(i).toSVGFile(new File("/home/mdecorde/Bureau/tigerexports/match_"+i+".svg")) |
|
102 |
// } |
|
103 |
// |
|
104 |
// println "SIMPLE NO PNC" |
|
105 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export1.csv"), "concordance_simple", 30, ["cat"], ["pos"], false); |
|
106 |
// println "MOT-PIVOT NO PNC" |
|
107 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export2.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], false); |
|
108 |
// println "BLOCKS NO PNC" |
|
109 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export3.csv"), "concordance_blocks", 30, ["cat"], ["pos"], false); |
|
110 |
// |
|
111 |
// println "SIMPLE + PNC" |
|
112 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export12.csv"), "concordance_simple", 30, ["cat"], ["pos"], true); |
|
113 |
// println "MOT-PIVOT + PNC" |
|
114 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export22.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], true); |
|
115 |
// println "BLOCKS + PNC" |
|
116 |
// println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export32.csv"), "concordance_blocks", 30, ["cat"], ["pos"], true); |
|
117 |
println ExecTimer.stop() |
|
118 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/test/DrawTSSVG.java (revision 713) | ||
---|---|---|
1 |
package org.txm.test; |
|
2 |
|
|
3 |
import java.io.File; |
|
4 |
import java.util.Arrays; |
|
5 |
|
|
6 |
import org.txm.searchengine.ts.*; |
|
7 |
import org.txm.utils.ExecTimer; |
|
8 |
|
|
9 |
public class DrawTSSVG { |
|
10 |
|
|
11 |
String userhome = System.getProperty("user.home"); |
|
12 |
File configdir = new File(userhome,"TXM/corpora/graal/tiger/tigersearch.logprop"); |
|
13 |
File registrydir = new File(userhome, "TXM/corpora/graal/tiger"); |
|
14 |
File svgfile = new File(registrydir, "result.svg"); |
|
15 |
String id = "GRAAL"; |
|
16 |
String query = "#n:[cat = \"Obj\"] >* #m & arity(#n, 2, 10)"; |
|
17 |
//String query = "[]"; |
|
18 |
|
|
19 |
public void test() throws Exception { |
|
20 |
TSCorpusManager manager = new TSCorpusManager(registrydir, configdir); |
|
21 |
|
|
22 |
if (manager.isInitialized()) { |
|
23 |
|
|
24 |
ExecTimer.start(); |
|
25 |
TSCorpus corpus = manager.getCorpus(id); |
|
26 |
|
|
27 |
System.out.println("T features: "+corpus.getTFeatures()); |
|
28 |
System.out.println("NT features: "+corpus.getNTFeatures()); |
|
29 |
|
|
30 |
TSResult result = corpus.query(query); |
|
31 |
|
|
32 |
result.setDisplayProperties(Arrays.asList("word", "pos", "form"), "type"); |
|
33 |
|
|
34 |
TSMatch first = result.getFirst(); |
|
35 |
System.out.println("First: nb of sub graph: "+first.getNumberOfSubGraph()); |
|
36 |
first.firstSubGraph(); |
|
37 |
first.toSVGFile(new File(registrydir,"result_1_1.svg")); |
|
38 |
System.out.println(first.toHTML() + "<br>"); |
|
39 |
first.nextSubGraph(); |
|
40 |
first.toSVGFile(new File(registrydir,"result_1_2.svg")); |
|
41 |
System.out.println(first.toHTML() + "<br>"); |
|
42 |
first.nextSubGraph(); |
|
43 |
first.toSVGFile(new File(registrydir,"result_1_3.svg")); |
|
44 |
System.out.println(first.toHTML() + "<br>"); |
|
45 |
|
|
46 |
// TSMatch next = result.getNext(); |
|
47 |
// System.out.println( "Next: nb of sub graph: "+next.getNumberOfSubGraph()); |
|
48 |
// next.firstSubGraph(); |
|
49 |
// next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_1.svg")); |
|
50 |
// System.out.println( next.toHTML() + "<br>"); |
|
51 |
// next.nextSubGraph(); |
|
52 |
// next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2.svg")); |
|
53 |
// System.out.println( next.toHTML() + "<br>"); |
|
54 |
// next.nextSubGraph(); |
|
55 |
// next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_3.svg")); |
|
56 |
// System.out.println( next.toHTML() + "<br>"); |
|
57 |
// next.previousSubGraph(); |
|
58 |
// next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2bis.svg")); |
|
59 |
// System.out.println( next.toHTML() + "<br>"); |
|
60 |
|
|
61 |
// System.out.println( "make some room :)" |
|
62 |
// for(File f : new File("/home/mdecorde/Bureau/tigerexports/").listFiles()) |
|
63 |
// f.delete() |
|
64 |
// |
|
65 |
|
|
66 |
// System.out.println( "SAVE XML" |
|
67 |
// result.toXml(new File("/home/mdecorde/Bureau/tigerexports/result.xml"), false, true) |
|
68 |
// |
|
69 |
// System.out.println( "SAVE AS SVG" |
|
70 |
// for (int i = 0 ; i < result.getNumberOfMatch() && i < 10; i++) { |
|
71 |
// result.getMatch(i).toSVGFile(new File("/home/mdecorde/Bureau/tigerexports/match_"+i+".svg")) |
|
72 |
// } |
|
73 |
// |
|
74 |
System.out.println("SIMPLE NO PNC"); |
|
75 |
System.out.println(result.toConcordance(new File(registrydir, "export1.csv"), "concordance_simple", 30, Arrays.asList("cat"), Arrays.asList("pos"), false)); |
|
76 |
System.out.println("MOT-PIVOT NO PNC"); |
|
77 |
System.out.println(result.toConcordance(new File(registrydir, "export2.csv"), "concordance_mot-pivot", 30, Arrays.asList("cat"), Arrays.asList("pos"), false)); |
|
78 |
System.out.println("BLOCKS NO PNC"); |
|
79 |
System.out.println(result.toConcordance(new File(registrydir, "export3.csv"), "concordance_blocks", 30, Arrays.asList("cat"), Arrays.asList("pos"), false)); |
|
80 |
|
|
81 |
// System.out.println( "SIMPLE + PNC" |
|
82 |
// System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export12.csv"), "concordance_simple", 30, ["cat"], ["pos"], true); |
|
83 |
// System.out.println( "MOT-PIVOT + PNC" |
|
84 |
// System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export22.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], true); |
|
85 |
// System.out.println( "BLOCKS + PNC" |
|
86 |
// System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export32.csv"), "concordance_blocks", 30, ["cat"], ["pos"], true); |
|
87 |
System.out.println(ExecTimer.stop()); |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
public static void main(String[] args) { |
|
92 |
DrawTSSVG d = new DrawTSSVG(); |
|
93 |
try { |
|
94 |
d.test(); |
|
95 |
} catch (Exception e) { |
|
96 |
// TODO Auto-generated catch block |
|
97 |
e.printStackTrace(); |
|
98 |
} |
|
99 |
} |
|
100 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ComputeTSIndex.java (revision 713) | ||
---|---|---|
38 | 38 |
import org.txm.rcp.editors.TXMResultEditorInput; |
39 | 39 |
import org.txm.searchengine.cqp.corpus.Corpus; |
40 | 40 |
import org.txm.searchengine.cqp.corpus.Partition; |
41 |
import org.txm.test.DrawTSSVG; |
|
41 | 42 |
import org.txm.tigersearch.editors.TIGERSearchEditor; |
42 | 43 |
import org.txm.tigersearch.editors.TIGERSearchEditorInput; |
43 | 44 |
import org.txm.tigersearch.editors.TSIndexEditor; |
... | ... | |
59 | 60 |
@Override |
60 | 61 |
public Object execute(final ExecutionEvent event) throws ExecutionException { |
61 | 62 |
|
62 |
IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
|
63 |
|
|
64 |
Object s = selection.getFirstElement(); |
|
65 |
if (s instanceof Corpus) { |
|
66 |
Corpus corpus = (Corpus)s; |
|
67 |
openEditor(corpus); |
|
68 |
} else if (s instanceof Partition) { |
|
69 |
Partition partition = (Partition)s; |
|
70 |
openEditor(partition); |
|
63 |
// IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event); |
|
64 |
// |
|
65 |
// Object s = selection.getFirstElement(); |
|
66 |
// if (s instanceof Corpus) { |
|
67 |
// Corpus corpus = (Corpus)s; |
|
68 |
// openEditor(corpus); |
|
69 |
// } else if (s instanceof Partition) { |
|
70 |
// Partition partition = (Partition)s; |
|
71 |
// openEditor(partition); |
|
72 |
// } |
|
73 |
|
|
74 |
DrawTSSVG d = new DrawTSSVG(); |
|
75 |
try { |
|
76 |
d.test(); |
|
77 |
} catch (Exception e) { |
|
78 |
// TODO Auto-generated catch block |
|
79 |
e.printStackTrace(); |
|
71 | 80 |
} |
72 | 81 |
return null; |
73 | 82 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/editors/TIGERSearchEditor.java (revision 713) | ||
---|---|---|
10 | 10 |
import org.eclipse.swt.custom.StyledText; |
11 | 11 |
import org.eclipse.swt.events.SelectionEvent; |
12 | 12 |
import org.eclipse.swt.events.SelectionListener; |
13 |
import org.eclipse.swt.graphics.Image; |
|
14 |
import org.eclipse.swt.layout.FormAttachment; |
|
15 |
import org.eclipse.swt.layout.FormData; |
|
16 |
import org.eclipse.swt.layout.FormLayout; |
|
17 | 13 |
import org.eclipse.swt.layout.GridData; |
18 | 14 |
import org.eclipse.swt.layout.GridLayout; |
19 | 15 |
import org.eclipse.swt.widgets.Button; |
... | ... | |
26 | 22 |
import org.eclipse.ui.IEditorInput; |
27 | 23 |
import org.eclipse.ui.IEditorSite; |
28 | 24 |
import org.eclipse.ui.PartInitException; |
29 |
import org.eclipse.ui.part.EditorPart; |
|
30 |
import org.txm.Toolbox; |
|
31 | 25 |
import org.txm.core.preferences.TBXPreferences; |
32 | 26 |
import org.txm.core.preferences.TXMPreferences; |
33 |
import org.txm.core.results.TXMResult; |
|
34 | 27 |
import org.txm.function.tigersearch.TIGERSearch; |
35 |
import org.txm.rcp.IImageKeys; |
|
36 | 28 |
import org.txm.rcp.JobsTimer; |
37 | 29 |
import org.txm.rcp.editors.TXMEditor; |
30 |
import org.txm.rcp.editors.TXMResultEditorInput; |
|
38 | 31 |
import org.txm.rcp.svg.SVGComposite; |
39 | 32 |
import org.txm.rcp.utils.JobHandler; |
40 | 33 |
import org.txm.rcp.views.QueriesView; |
... | ... | |
73 | 66 |
|
74 | 67 |
@Override |
75 | 68 |
public void _createPartControl(Composite parent) { |
76 |
|
|
77 |
Composite mainPanel = new Composite(parent, SWT.NONE); |
|
78 |
mainPanel.setLayout(new FormLayout()); |
|
79 | 69 |
|
80 | 70 |
// System.out.println(parent.getLayout()); |
81 |
Composite queryPanel = new Composite(mainPanel, SWT.NONE); |
|
82 |
Composite paramPanel = new Composite(mainPanel, SWT.NONE); |
|
83 |
svgPanel = new SVGComposite(mainPanel, SWT.EMBEDDED | SWT.NO_BACKGROUND); |
|
84 |
|
|
85 |
FormData qdata = new FormData(); |
|
86 |
qdata.top = new FormAttachment(0); |
|
87 |
qdata.left = new FormAttachment(0); |
|
88 |
qdata.right = new FormAttachment(100); |
|
89 |
qdata.bottom = new FormAttachment(30); |
|
90 |
queryPanel.setLayoutData(qdata); |
|
91 |
|
|
92 |
FormData fdata = new FormData(); |
|
93 |
fdata.top = new FormAttachment(queryPanel); |
|
94 |
fdata.left = new FormAttachment(0); |
|
95 |
fdata.right = new FormAttachment(100); |
|
96 |
paramPanel.setLayoutData(fdata); |
|
97 |
|
|
98 |
fdata = new FormData(); |
|
99 |
fdata.top = new FormAttachment(paramPanel); |
|
100 |
fdata.left = new FormAttachment(0); |
|
101 |
fdata.right = new FormAttachment(100); |
|
102 |
fdata.bottom = new FormAttachment(100); |
|
103 |
svgPanel.setLayoutData(fdata); |
|
104 |
|
|
71 |
Composite queryPanel = this.getCommandParametersGroup(); |
|
72 |
|
|
105 | 73 |
// fill query Area |
106 |
GridLayout qlayout = new GridLayout(1, true);
|
|
74 |
GridLayout qlayout = new GridLayout(11, false);
|
|
107 | 75 |
queryPanel.setLayout(qlayout); |
108 | 76 |
|
109 | 77 |
queryArea = new StyledText(queryPanel, SWT.BORDER | SWT.V_SCROLL | SWT.H_SCROLL); |
110 | 78 |
GridData queryAreaLayoutData = new GridData(GridData.FILL, GridData.FILL, true, true); |
79 |
queryAreaLayoutData.horizontalSpan = 11; |
|
80 |
queryAreaLayoutData.heightHint = 80; |
|
81 |
queryAreaLayoutData.minimumHeight = 80; |
|
111 | 82 |
queryArea.setLayoutData(queryAreaLayoutData); |
112 | 83 |
|
113 | 84 |
// fill param Area |
114 |
GridLayout layout = new GridLayout(11, false); |
|
115 |
paramPanel.setLayout(layout); |
|
116 | 85 |
|
117 | 86 |
GridData gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
118 |
new Label(paramPanel, SWT.NONE).setText("Sent ");
|
|
119 |
sentSpinner = new Spinner(paramPanel, SWT.BORDER);
|
|
87 |
new Label(queryPanel, SWT.NONE).setText("Sent ");
|
|
88 |
sentSpinner = new Spinner(queryPanel, SWT.BORDER);
|
|
120 | 89 |
sentSpinner.setMinimum(1); |
121 | 90 |
sentSpinner.setIncrement(1); |
122 | 91 |
sentSpinner.setMaximum(10000000); |
... | ... | |
135 | 104 |
}; |
136 | 105 |
sentSpinner.addSelectionListener(selChangedListener); |
137 | 106 |
|
138 |
sentCounterLabel = new Label(paramPanel, SWT.NONE);
|
|
107 |
sentCounterLabel = new Label(queryPanel, SWT.NONE);
|
|
139 | 108 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
140 | 109 |
sentCounterLabel.setLayoutData(gdata); |
141 | 110 |
|
142 |
new Label(paramPanel, SWT.NONE).setText("Sub ");
|
|
143 |
subSpinner = new Spinner(paramPanel, SWT.BORDER);
|
|
111 |
new Label(queryPanel, SWT.NONE).setText("Sub ");
|
|
112 |
subSpinner = new Spinner(queryPanel, SWT.BORDER);
|
|
144 | 113 |
subSpinner.setMinimum(1); |
145 | 114 |
subSpinner.setIncrement(1); |
146 | 115 |
subSpinner.setMaximum(100000000); |
... | ... | |
149 | 118 |
subSpinner.setLayoutData(gdata); |
150 | 119 |
subSpinner.addSelectionListener(selChangedListener); |
151 | 120 |
|
152 |
subCounterLabel = new Label(paramPanel, SWT.NONE);
|
|
121 |
subCounterLabel = new Label(queryPanel, SWT.NONE);
|
|
153 | 122 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
154 | 123 |
subCounterLabel.setLayoutData(gdata); |
155 | 124 |
|
156 |
new Label(paramPanel, SWT.NONE).setText("T ");
|
|
157 |
TCombo = new Combo(paramPanel, SWT.READ_ONLY);
|
|
125 |
new Label(queryPanel, SWT.NONE).setText("T ");
|
|
126 |
TCombo = new Combo(queryPanel, SWT.READ_ONLY);
|
|
158 | 127 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
159 | 128 |
TCombo.setLayoutData(gdata); |
160 | 129 |
TCombo.addSelectionListener(selChangedListener); |
161 | 130 |
|
162 |
new Label(paramPanel, SWT.NONE).setText("NT ");
|
|
163 |
NTCombo = new Combo(paramPanel, SWT.READ_ONLY);
|
|
131 |
new Label(queryPanel, SWT.NONE).setText("NT ");
|
|
132 |
NTCombo = new Combo(queryPanel, SWT.READ_ONLY);
|
|
164 | 133 |
gdata = new GridData(SWT.FILL, SWT.CENTER, true, true); |
165 | 134 |
NTCombo.setLayoutData(gdata); |
166 | 135 |
NTCombo.addSelectionListener(selChangedListener); |
167 | 136 |
|
168 |
okButton = new Button(paramPanel, SWT.PUSH);
|
|
137 |
okButton = new Button(queryPanel, SWT.PUSH);
|
|
169 | 138 |
okButton.setText("Search"); |
170 | 139 |
okButton.addSelectionListener(new SelectionListener() { |
171 | 140 |
@Override |
... | ... | |
186 | 155 |
} |
187 | 156 |
}); |
188 | 157 |
|
158 |
Composite mainPanel = this.getResultArea(); |
|
159 |
mainPanel.setLayout(new GridLayout(1, false)); |
|
160 |
svgPanel = new SVGComposite(mainPanel, SWT.EMBEDDED | SWT.NO_BACKGROUND); |
|
161 |
svgPanel.setLayoutData(new GridData(SWT.FILL, SWT.FILL, true, true)); |
|
162 |
|
|
189 | 163 |
initializeFields(); |
190 | 164 |
} |
191 | 165 |
|
... | ... | |
370 | 344 |
|
371 | 345 |
@Override |
372 | 346 |
public void setFocus() { |
373 |
queryArea.forceFocus(); |
|
347 |
if (queryArea != null) |
|
348 |
queryArea.forceFocus(); |
|
374 | 349 |
} |
375 | 350 |
|
376 | 351 |
@Override |
... | ... | |
389 | 364 |
} //$NON-NLS-1$ //$NON-NLS-2$ |
390 | 365 |
Log.severe(svgFile.toString()); |
391 | 366 |
|
392 |
TIGERSearchEditorInput ii = (TIGERSearchEditorInput) input; |
|
393 |
source = ii.getSource(); |
|
394 |
ts = ii.getTIGERSearch(); |
|
395 |
if (source instanceof Corpus) { |
|
396 |
corpus = ((Corpus)source); |
|
397 |
} |
|
367 |
TXMResultEditorInput<TIGERSearch> ii = (TXMResultEditorInput<TIGERSearch>) input; |
|
368 |
ts = ii.getResult(); |
|
369 |
corpus = ts.getCorpus(); |
|
398 | 370 |
} |
399 | 371 |
|
400 | 372 |
@Override |
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/PunctInject.groovy (revision 713) | ||
---|---|---|
1 |
// Script to restore punctuation etc. to results of TS query. |
|
2 |
// Inputs: |
|
3 |
// --- Tiger-XML Document node |
|
4 |
// --- Java array: |
|
5 |
// --- --- String [index][type] where: |
|
6 |
// --- --- --- type == 0 gives the xml:id |
|
7 |
// --- --- --- type == 1 gives the word form |
|
8 |
// Process: |
|
9 |
// --- Injects punctuation. |
|
10 |
// Returns: |
|
11 |
// --- Tiger-XML Document node. |
|
12 |
package org.txm.export.ts; |
|
13 |
|
|
14 |
import javax.xml.parsers.DocumentBuilderFactory |
|
15 |
import org.txm.searchengine.cqp.CqpDataProxy; |
|
16 |
import org.txm.searchengine.cqp.corpus.Corpus |
|
17 |
import org.txm.searchengine.cqp.corpus.CorpusManager; |
|
18 |
import org.txm.searchengine.cqp.corpus.QueryResult; |
|
19 |
import org.txm.searchengine.cqp.corpus.query.Query |
|
20 |
|
|
21 |
public class PunctInject |
|
22 |
{ |
|
23 |
public process (def tigerXml, def txmIdWordTable) { |
|
24 |
|
|
25 |
def allTs = toList(tigerXml.getElementsByTagName('t')) |
|
26 |
|
|
27 |
def tAttrs = getTAttrNames(allTs[0]) |
|
28 |
|
|
29 |
def tNodeIdPrefix = allTs[0].getAttribute('id').tokenize('#').first() |
|
30 |
|
|
31 |
def addedWordIds = [] |
|
32 |
|
|
33 |
int tIx = 0 |
|
34 |
|
|
35 |
while (txmIdWordTable) { |
|
36 |
|
|
37 |
def word = txmIdWordTable.remove(0) |
|
38 |
|
|
39 |
if (tIx == allTs.size()) { |
|
40 |
// End of TS file, but still words left in the BFM file. |
|
41 |
addTNode(word, allTs.last(), tAttrs, tNodeIdPrefix, 'append') |
|
42 |
} |
|
43 |
|
|
44 |
else { |
|
45 |
|
|
46 |
def tNode = allTs[tIx] |
|
47 |
|
|
48 |
def tId = getTNodeId(tNode) |
|
49 |
|
|
50 |
if (tId == word[0] && tNode.getAttribute('word') == word[1]) { |
|
51 |
|
|
52 |
// alles gut |
|
53 |
|
|
54 |
tIx += 1 |
|
55 |
|
|
56 |
} |
|
57 |
|
|
58 |
else if (tId == word[0]) { |
|
59 |
|
|
60 |
println("Mismatched Ids! ($tId)") |
|
61 |
tIx += 1 |
|
62 |
} |
|
63 |
|
|
64 |
else if (['#', '*'].contains(tNode.getAttribute('word')) ) { |
|
65 |
|
|
66 |
// SRCMF duplicata; try comparing word against the next tNode next time |
|
67 |
// around. |
|
68 |
|
|
69 |
txmIdWordTable.add(0, word) |
|
70 |
|
|
71 |
tIx += 1 |
|
72 |
|
|
73 |
} |
|
74 |
|
|
75 |
// Check that the SRCMF corpus doesn't have a bug in it... |
|
76 |
|
|
77 |
else if ( !(word[1] =~ /[\,\.\?\!\:\;\(\)\[\]\{\}]/) |
|
78 |
&& (allTs[0..tIx - 1].find{ |
|
79 |
it.getAttribute('id') == "$tNodeIdPrefix#$tId" |
|
80 |
})) { |
|
81 |
|
|
82 |
println "Warning: word ${tNode.getAttribute('word')}, id $tId appears twice in corpus!" |
|
83 |
|
|
84 |
txmIdWordTable.add(0, word) |
|
85 |
|
|
86 |
tIx += 1 |
|
87 |
|
|
88 |
} |
|
89 |
|
|
90 |
// Check that there's not an extra word in the SRCMF corpus (rare, usu. a tokenisation change) |
|
91 |
|
|
92 |
else if ( !(word[1] =~ /[\,\.\?\!\:\;\(\)\[\]\{\}]/) |
|
93 |
&& (allTs[tIx..-1].find{ |
|
94 |
it.getAttribute('id') == "$tNodeIdPrefix#${word[0]}" |
|
95 |
})) { |
|
96 |
|
|
97 |
println "Warning: word ${tNode.getAttribute('word')}, id $tId does not appear in BFM!" |
|
98 |
|
|
99 |
txmIdWordTable.add(0, word) |
|
100 |
|
|
101 |
tIx += 1 |
|
102 |
} |
|
103 |
|
|
104 |
else if (addedWordIds.contains(tId)) { |
|
105 |
|
|
106 |
println "Warning: word ${tNode.getAttribute('word')}, id ${tId} out of sequence in SRCMF corpus!" |
|
107 |
|
|
108 |
txmIdWordTable.add(0, word) |
|
109 |
|
|
110 |
tIx += 1 |
|
111 |
|
|
112 |
} |
|
113 |
|
|
114 |
else { |
|
115 |
|
|
116 |
// Insert word. In the first instance, it will have the same parent as |
|
117 |
// the tNode before which it's being inserted. |
|
118 |
|
|
119 |
addTNode(word, allTs[tIx], tAttrs, tNodeIdPrefix, 'before') |
|
120 |
|
|
121 |
addedWordIds.add(word[0]) |
|
122 |
|
|
123 |
} |
|
124 |
|
|
125 |
} |
|
126 |
|
|
127 |
} |
|
128 |
|
|
129 |
// Second phase: move punctuation into previous sentence, |
|
130 |
// dependent on sequence. |
|
131 |
|
|
132 |
def allTerminalses = toList(tigerXml.getElementsByTagName('terminals')) |
|
133 |
|
|
134 |
for (def i = 1 ; i < allTerminalses.size() ; i++) { |
|
135 |
|
|
136 |
def ts = toList(allTerminalses[i].getElementsByTagName('t')) |
|
137 |
|
|
138 |
def startPunc = true |
|
139 |
|
|
140 |
def puncStack = [] |
|
141 |
|
|
142 |
while (ts && startPunc) { |
|
143 |
|
|
144 |
if ((ts[0].getAttribute('word') =~ /[A-zÀ-ÿ0-9]/).size() == 0) { |
|
145 |
|
|
146 |
puncStack.add(ts.remove(0)) |
|
147 |
|
|
148 |
} |
|
149 |
|
|
150 |
else { |
|
151 |
|
|
152 |
startPunc = false |
|
153 |
|
|
154 |
} |
|
155 |
|
|
156 |
} |
|
157 |
|
|
158 |
// Now, treat the punctuation stack at the beginning of the sentence |
|
159 |
|
|
160 |
if ( puncStack ) { |
|
161 |
|
|
162 |
int moveLeft = 0 |
|
163 |
|
|
164 |
// First, identify LAST instance of sentence-final punctuation. |
|
165 |
|
|
166 |
def puncString = puncStack.collect{ it.getAttribute('word')[0] }.join('') |
|
167 |
|
|
168 |
def matches = puncString =~ /[\.\,\;\:\!\?\)\]\}»”’]/ |
|
169 |
|
|
170 |
if (matches.size() > 0) { |
|
171 |
|
|
172 |
moveLeft = puncString.lastIndexOf(matches[-1]) + 1 |
|
173 |
|
|
174 |
} |
|
175 |
|
|
176 |
// Second, split pairs of straight quotes |
|
177 |
|
|
178 |
matches = puncString =~ /(""|'')/ //" |
|
179 |
|
|
180 |
if (matches.size() > 0) { |
|
181 |
|
|
182 |
moveLeft = [moveLeft, puncString.lastIndexOf(matches[-1][0]) + 1].max() |
|
183 |
} |
|
184 |
|
|
185 |
// Now, move moveLeft punctuation nodes to the end of the prev. sentence |
|
186 |
|
|
187 |
ts = toList(allTerminalses[i].getElementsByTagName('t')) |
|
188 |
|
|
189 |
for (def j = 0 ; j < moveLeft ; j++ ) { |
|
190 |
|
|
191 |
allTerminalses[i - 1].appendChild(ts[j]) |
|
192 |
|
|
193 |
} |
|
194 |
|
|
195 |
} |
|
196 |
} |
|
197 |
return tigerXml |
|
198 |
|
|
199 |
} |
|
200 |
|
|
201 |
private addTNode(word, tNode, tAttrs, tNodeIdPrefix, where) { |
|
202 |
|
|
203 |
def newTNode = tNode.getOwnerDocument().createElement('t') |
|
204 |
|
|
205 |
for (def anAttr : tAttrs) { |
|
206 |
|
|
207 |
if (anAttr == 'id') { |
|
208 |
|
|
209 |
newTNode.setAttribute('id', "${tNodeIdPrefix}#${word[0]}") |
|
210 |
|
|
211 |
} |
|
212 |
|
|
213 |
else if (anAttr == 'word') { |
|
214 |
|
|
215 |
newTNode.setAttribute('word', word[1]) |
|
216 |
|
|
217 |
} |
|
218 |
|
|
219 |
else { |
|
220 |
|
|
221 |
newTNode.setAttribute(anAttr, '--') |
|
222 |
|
|
223 |
} |
|
224 |
|
|
225 |
} |
|
226 |
|
|
227 |
if (where == 'before') { |
|
228 |
|
|
229 |
tNode.getParentNode().insertBefore(newTNode, tNode) |
|
230 |
|
|
231 |
} |
|
232 |
|
|
233 |
else if (where == 'append') { |
|
234 |
|
|
235 |
tNode.getParentNode().appendChild(newTNode) |
|
236 |
|
|
237 |
} |
|
238 |
|
|
239 |
else { |
|
240 |
|
|
241 |
throw new IllegalArgumentException('Bad before value') |
|
242 |
|
|
243 |
} |
|
244 |
|
|
245 |
} |
|
246 |
|
|
247 |
public getTAttrNames(tNode) { |
|
248 |
|
|
249 |
def nodeMap = tNode.attributes |
|
250 |
|
|
251 |
def nameList = [] |
|
252 |
|
|
253 |
for ( def i = 0 ; i < nodeMap.getLength() ; i++ ) { |
|
254 |
nameList.add( nodeMap.item(i).nodeName ) |
|
255 |
} |
|
256 |
|
|
257 |
return nameList |
|
258 |
|
|
259 |
} |
|
260 |
|
|
261 |
public getTNodeId(tNode) { |
|
262 |
|
|
263 |
return tNode.getAttribute('id').tokenize('#').last() |
|
264 |
|
|
265 |
} |
|
266 |
|
|
267 |
public def toList(def iterable) { |
|
268 |
return iterable.findAll {true}; |
|
269 |
} |
|
270 |
|
|
271 |
public static def getWords(String corpusname, String query) |
|
272 |
{ |
|
273 |
CorpusManager cm = CorpusManager.getCorpusManager(); |
|
274 |
Corpus corpus = cm.getCorpus(corpusname); |
|
275 |
def word_property = corpus.getProperty("word") |
|
276 |
def id_property = corpus.getProperty("id") |
|
277 |
|
|
278 |
def wordCache = cm.getCorpusProxies(corpus).get(word_property); |
|
279 |
def idCache = cm.getCorpusProxies(corpus).get(id_property); |
|
280 |
|
|
281 |
def positions = new int[corpus.getSize()]; |
|
282 |
for(int i = 0 ; i< corpus.getSize() ; i++) |
|
283 |
positions[i] = i; |
|
284 |
def word_values = wordCache.getData(positions) |
|
285 |
def id_values = idCache.getData(positions) |
|
286 |
ArrayList<String[]> words = new ArrayList<String[]>(corpus.getSize()); |
|
287 |
for(int p : positions) |
|
288 |
{ |
|
289 |
if(id_values[p].startsWith("w")) |
|
290 |
{ |
|
291 |
words.add(new String[2]) |
|
292 |
words[p][0] = id_values[p] |
|
293 |
words[p][1] = word_values[p] |
|
294 |
} |
|
295 |
} |
|
296 |
|
|
297 |
return words; |
|
298 |
} |
|
299 |
|
|
300 |
public static void main(String[] args) |
|
301 |
{ |
|
302 |
def words = [ |
|
303 |
["w203_1", "Dominedeu"], |
|
304 |
["w203_2", "devemps"], |
|
305 |
["w203_3", "lauder"], |
|
306 |
["w203_4", "et"], |
|
307 |
["w203_5", "a"], |
|
308 |
["w203_6", "sus"], |
|
309 |
["w203_7", "sancz"], |
|
310 |
["w203_8", "honor"], |
|
311 |
["w203_9", "porter"], |
|
312 |
["w203_10", "»"], |
|
313 |
["w203_10.2", ")"], |
|
314 |
["w203_10.3", '.'], |
|
315 |
["w203_10.5", '"'], |
|
316 |
["w203_10.7", '"'], |
|
317 |
["w203_11", "in"], |
|
318 |
["w203_12", "su'"], |
|
319 |
["w203_13", "amor"], |
|
320 |
["w203_14", "cantomps"], |
|
321 |
["w203_15", "del"], |
|
322 |
["w203_16", "·sanz"], |
|
323 |
["w203_17", "quae"], |
|
324 |
["w203_18", "por"], |
|
325 |
["w203_19", "lui"], |
|
326 |
["w203_20", "augrent"], |
|
327 |
["w203_21", "granz"], |
|
328 |
["w203_22", "aanz"], |
|
329 |
["w203_23", "."], |
|
330 |
]; |
|
331 |
File tigerXml = new File(args[0]); |
|
332 |
def factory = DocumentBuilderFactory.newInstance() |
|
333 |
factory.setXIncludeAware(true) |
|
334 |
def builder = factory.newDocumentBuilder() |
|
335 |
def THEDOM = builder.parse(tigerXml).documentElement |
|
336 |
|
|
337 |
println THEDOM.getClass() |
|
338 |
|
|
339 |
def NEWDOM = new PunctInject().process(THEDOM, words); |
|
340 |
|
|
341 |
println NEWDOM |
|
342 |
|
|
343 |
// File outfile = new File("outfile.xml") |
|
344 |
// println outfile |
|
345 |
//outfile.withWriter("iso-8859-1"){writer -> |
|
346 |
//writer.write(NEWDOM.toString()) |
|
347 |
// } |
|
348 |
} |
|
349 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/MatchInject.groovy (revision 713) | ||
---|---|---|
1 |
#! /usr/bin/groovy |
|
2 |
package org.txm.export.ts; |
|
3 |
|
|
4 |
import org.xml.sax.XMLReader; |
|
5 |
import org.xml.sax.helpers.XMLReaderFactory; |
|
6 |
import groovy.util.slurpersupport.NodeChild |
|
7 |
|
|
8 |
/* |
|
9 |
* The script takes the <matches/> elements from file "Tiger_match.xml" |
|
10 |
* and inserts them at the end of the <s/> element bearing the same ID. |
|
11 |
* Inputs: three file names |
|
12 |
* - TsInputName --- the TS file WITHOUT matches |
|
13 |
* - MatchInputName --- the TS file containing only matches. |
|
14 |
* - OutputFileName ---required output file. |
|
15 |
* To pass these arguments from within an application, call script() directly. |
|
16 |
*/ |
|
17 |
|
|
18 |
// Filename variables |
|
19 |
// def TsInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/ts_input.xml' |
|
20 |
// def MatchInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/match_input.xml' |
|
21 |
// def OutputFileName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/test.xml' |
|
22 |
|
|
23 |
// Main code: checks for correct number of arguments if run from cmd line. |
|
24 |
if (args && args.size() == 3) { |
|
25 |
script(args[0], args[1], args[2]) |
|
26 |
} else { |
|
27 |
println '''Incorrect number of arguments: three strings required. |
|
28 |
|
|
29 |
USAGE: |
|
30 |
****** |
|
31 |
groovy MatchInject.groovy TsInput.xml MatchInput.xml OutputFile.xml'''} |
|
32 |
|
|
33 |
def script(String tsInputName, String matchInputName, String outputFileName) { |
|
34 |
script(new File(tsInputName), new File(matchInputName, new File(outputFileName))); |
|
35 |
} |
|
36 |
|
|
37 |
// The script. |
|
38 |
def script(File tsInputFile, File matchInputFile, File outputFile) { |
|
39 |
//println "loading TsInput..." |
|
40 |
def TsInput = new XmlSlurper().parse(tsInputFile) |
|
41 |
//println "loading matchInputFile..." |
|
42 |
System.setProperty("org.xml.sax.driver", "com.sun.org.apache.xerces.internal.parsers.SAXParser"); |
|
43 |
def xmlReader = XMLReaderFactory.createXMLReader(); |
|
44 |
xmlReader.setFeature('http://xml.org/sax/features/namespaces', false) |
|
45 |
XmlSlurper mslurper = new XmlSlurper(xmlReader); |
|
46 |
def MatchInput = mslurper.parse(matchInputFile) |
|
47 |
//println "building OutputFile... size="+matchInputFile.length() |
|
48 |
|
|
49 |
def inputSentences = TsInput.'**'.findAll {it.name() == 's'}; |
|
50 |
def matcheSentences = MatchInput.'**'.findAll { it.name() == 's' } |
|
51 |
// println "MATCHES" |
|
52 |
// for(NodeChild match : matcheSentences) { |
|
53 |
// //println match.getClass() |
|
54 |
// match.namespacePrefix = "" |
|
55 |
// match.namespaceMap = [:] |
|
56 |
// //println match |
|
57 |
// } |
|
58 |
int count = 0; |
|
59 |
//println("nb of input sentences: "+inputSentences.size()); |
|
60 |
|
|
61 |
def markup = { |
|
62 |
mkp.xmlDeclaration() |
|
63 |
//mkp.declareNamespace("svg":"xmlns=\"http://www.w3.org/2000/svg\"") |
|
64 |
|
|
65 |
corpus(id:'TSOut') { |
|
66 |
body { |
|
67 |
inputSentences.each { sPath -> |
|
68 |
//if (count++%200 == 0) println((100*count/inputSentences.size())) |
|
69 |
s(id:"${sPath.'@id'}") { |
|
70 |
mkp.yield(sPath.graph) |
|
71 |
def sMatches = matcheSentences.find { |
|
72 |
it.'@id'.toString() == sPath.'@id'.toString() |
|
73 |
} |
|
74 |
|
|
75 |
if (sMatches) { |
|
76 |
mkp.yield(sMatches.matches) |
|
77 |
} |
|
78 |
//matcheSentences.removeAll(sMatches) |
|
79 |
} |
|
80 |
} |
|
81 |
} |
|
82 |
} |
|
83 |
} |
|
84 |
def processor = new groovy.xml.StreamingMarkupBuilder().bind(markup) |
|
85 |
outputFile.withWriter { it << groovy.xml.XmlUtil.serialize(processor)} |
|
86 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/package.html (revision 713) | ||
---|---|---|
1 |
<html> |
|
2 |
<body> |
|
3 |
<p>Contains scripts to build the TIGERSearch concordances</p> |
|
4 |
</body> |
|
5 |
</html> |
|
0 | 6 |
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/ConcordanceBlocks.groovy (revision 713) | ||
---|---|---|
1 |
package org.txm.export.ts; |
|
2 |
|
|
3 |
import javax.xml.parsers.DocumentBuilderFactory |
|
4 |
|
|
5 |
class ConcordanceBlocks { |
|
6 |
def codec = 'UTF-8' |
|
7 |
|
|
8 |
int cx = 30; |
|
9 |
def ntTypes = []; |
|
10 |
def tTypes = []; |
|
11 |
File xmlfile, outfile; |
|
12 |
|
|
13 |
public boolean process(File xmlfile, File outfile, int cx, def ntTypes, def tTypes) { |
|
14 |
this.cx = cx; |
|
15 |
this.xmlfile = xmlfile; |
|
16 |
this.outfile = outfile; |
|
17 |
this.ntTypes = ntTypes; |
|
18 |
this.tTypes = tTypes; |
|
19 |
|
|
20 |
Writer writer = outfile.newPrintWriter(codec) |
|
21 |
|
|
22 |
println 'Reading XML File' |
|
23 |
def factory = DocumentBuilderFactory.newInstance() |
|
24 |
factory.setXIncludeAware(true) |
|
25 |
def builder = factory.newDocumentBuilder() |
|
26 |
def records = builder.parse(xmlfile).documentElement |
|
27 |
println 'done.' |
|
28 |
|
|
29 |
println 'done. Pre-treating file to combine results with the same pivot...' |
|
30 |
records = matchCombine(records) |
|
31 |
println 'done.' |
|
32 |
|
|
33 |
println 'Calculating max. number of blocks...' |
|
34 |
def nBlock = 0 |
|
35 |
|
|
36 |
def allMatches = toList(records.getElementsByTagName('match')) |
|
37 |
|
|
38 |
for (def match : allMatches) { |
|
39 |
nBlock = [ |
|
40 |
nBlock, |
|
41 |
toList(match.getElementsByTagName('variable')).findAll{ |
|
42 |
it.getAttribute('name').startsWith('#block') |
|
43 |
}.size() |
|
44 |
].max() |
|
45 |
} |
|
46 |
|
|
47 |
println "done ( $nBlock )" |
|
48 |
|
|
49 |
// Table headers |
|
50 |
println 'Writing concordance...' |
|
51 |
|
|
52 |
def header = ['sId', 'LeftCxOutsideSnt', 'LeftCxInsideSnt'] |
|
53 |
|
|
54 |
for (int i = nBlock ; i > 0 ; i--) { |
|
55 |
|
|
56 |
header.add("${i}BlockBeforePivot") |
|
57 |
|
|
58 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
59 |
|
|
60 |
header.add("${i}BlockBeforePivotType${j+1}") |
|
61 |
|
|
62 |
} |
|
63 |
} |
|
64 |
|
|
65 |
header.add('Pivot') |
|
66 |
|
|
67 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
68 |
|
|
69 |
header.add("PivotType${j+1}") |
|
70 |
|
|
71 |
} |
|
72 |
|
|
73 |
for (int i = 1 ; i <= nBlock ; i++) { |
|
74 |
|
|
75 |
header.add("${i}BlockAfterPivot") |
|
76 |
|
|
77 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
78 |
|
|
79 |
header.add("${i}BlockAfterPivotType${j+1}") |
|
80 |
|
|
81 |
} |
|
82 |
} |
|
83 |
|
|
84 |
header.addAll(['RightCxInsideSnt', 'RightCxOutsideSnt', 'Warnings']) |
|
85 |
|
|
86 |
writer.write(header.join("\t")+"\n"); |
|
87 |
|
|
88 |
def allTerminals = toList(records.getElementsByTagName('t')) |
|
89 |
int tenPercentile = 0 |
|
90 |
|
|
91 |
int i = 0; |
|
92 |
for (def aMatchNode : allMatches) { |
|
93 |
if ((int)(((float)++i / allMatches.size()) * 10) > tenPercentile) |
|
94 |
println ""+(++tenPercentile * 10)+ ' percent complete...' |
|
95 |
|
|
96 |
def (rowDict, inSntCxLengthLeft, inSntCxLengthRight) = match2CSVrow(aMatchNode, header); |
|
97 |
|
|
98 |
// Add out-of-sentence context |
|
99 |
def sNode = aMatchNode.parentNode.parentNode; // sentence |
|
100 |
def terminals = toList(sNode.getElementsByTagName('t')); |
|
101 |
def firstTInS = terminals[0] // get first node of the sentence |
|
102 |
def lastTInS = terminals[-1] // get last node of the sentence |
|
103 |
def firstTInSIx = allTerminals.indexOf(firstTInS) // get its position in the text |
|
104 |
def lastTInSIx = allTerminals.indexOf(lastTInS) // get its position in the text |
|
105 |
|
|
106 |
// Left context |
|
107 |
def lexs = [] |
|
108 |
int start = Math.max(firstTInSIx - cx + inSntCxLengthLeft, 0); |
|
109 |
int end = firstTInSIx; |
|
110 |
if(start < end) |
|
111 |
for (def tNode : allTerminals.subList(start, end)){ |
|
112 |
lexs.add(tNode.getAttribute('word')) |
|
113 |
if (toList(tNode.parentNode.getElementsByTagName('t'))[-1] == tNode) |
|
114 |
lexs.add('/') |
|
115 |
} |
|
116 |
rowDict['LeftCxOutsideSnt'] = lexs.join(" ") |
|
117 |
|
|
118 |
// Right context |
|
119 |
lexs = [] |
|
120 |
|
|
121 |
start = lastTInSIx + 1; |
|
122 |
end = Math.min(allTerminals.size(), lastTInSIx + cx - inSntCxLengthRight) |
|
123 |
|
|
124 |
if(start < end) { |
|
125 |
for (def tNode : allTerminals.subList(start, end)){ |
|
126 |
lexs.add(tNode.getAttribute('word')) |
|
127 |
if (toList(tNode.parentNode.getElementsByTagName('t'))[-1] == tNode) |
|
128 |
lexs.add('/') |
|
129 |
}} |
|
130 |
rowDict["RightCxOutsideSnt"] = lexs.join(" ") |
|
131 |
|
|
132 |
// Right rowDict to CSV |
|
133 |
|
|
134 |
// CSVWriter.writerow(dict(zip([k for k in rowDict.iterkeys()], \ |
|
135 |
// [ v.encode('utf-8') for v in rowDict.itervalues() ] ))) |
|
136 |
|
|
137 |
String line = ""; |
|
138 |
for (int ii = 0 ; ii < header.size() ; ii++){ |
|
139 |
String h = header.get(ii) |
|
140 |
String val = rowDict.getAt(h); |
|
141 |
if (val == null) |
|
142 |
line += "--" |
|
143 |
else |
|
144 |
line += val |
|
145 |
if (ii < header.size() -1) |
|
146 |
line +="\t" |
|
147 |
} |
|
148 |
writer.write(line+"\n") |
|
149 |
writer.flush() |
|
150 |
|
|
151 |
} |
|
152 |
writer.close(); |
|
153 |
return true; |
|
154 |
} |
|
155 |
|
|
156 |
private def match2CSVrow(def aMatchNode, def header) { |
|
157 |
// Step 1: Build a LIST of DICTIONARIES to describe the variables: |
|
158 |
// dict(name='varname_minus_the_hash' |
|
159 |
// parent='nt_nodes' |
|
160 |
// terminals='t_nodes in a list') |
|
161 |
|
|
162 |
def sNode = aMatchNode.parentNode.parentNode // get the sentence node |
|
163 |
|
|
164 |
def tNodesInSentence = toList(sNode.getElementsByTagName('t')) // get all terminal nodes of the sentence |
|
165 |
|
|
166 |
def varDetails = [] |
|
167 |
|
|
168 |
for ( def aVariable : aMatchNode.getElementsByTagName('variable')){ |
|
169 |
if ( aVariable.getAttribute('name') == '#pivot' || |
|
170 |
aVariable.getAttribute('name').startsWith('#block')) { |
|
171 |
def varParent = idKey(sNode, aVariable.getAttribute('idref')) |
|
172 |
|
|
173 |
varDetails.add([ |
|
174 |
'name' : aVariable.getAttribute('name').substring(1), |
|
175 |
'parent': varParent, |
|
176 |
'terminals': toList(getTNodes(varParent)), |
|
177 |
'lexform': writeLexForm(varParent), |
|
178 |
]) |
|
179 |
|
|
180 |
// Add types |
|
181 |
|
|
182 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
183 |
|
|
184 |
def a = '' |
|
185 |
|
|
186 |
if (j < ntTypes.size() && varParent.getAttribute(ntTypes[0]) |
|
187 |
) { |
|
188 |
a = varParent.getAttribute(ntTypes[j]) |
|
189 |
} else if (j < tTypes.size() && varParent.getAttribute(tTypes[0]) |
|
190 |
) { |
|
191 |
a = varParent.getAttribute(tTypes[j]) |
|
192 |
} |
|
193 |
|
|
194 |
varDetails[-1]["type${j+1}"] = a |
|
195 |
|
|
196 |
} |
|
197 |
|
|
198 |
// write min idx in sentence of terminal nodes used |
|
199 |
varDetails[-1]['terminalsIx'] = varDetails[-1]['terminals'].collect{tNodesInSentence.indexOf(it)} |
|
200 |
} |
|
201 |
} |
|
202 |
|
|
203 |
// Sort varDetails by the start ID of the word |
|
204 |
|
|
205 |
varDetails = varDetails.sort{it['terminalsIx'].min()} |
|
206 |
|
|
207 |
def pivotBlockPosition = 0; |
|
208 |
pivotBlockPosition = varDetails.findIndexOf{it['name'] == 'pivot'} |
|
209 |
|
|
210 |
// Write the table |
|
211 |
def rowDict = [:] |
|
212 |
rowDict['sId'] = sNode.getAttribute('id') |
|
213 |
|
|
214 |
// write the pivot |
|
215 |
rowDict['Pivot'] = varDetails[pivotBlockPosition]['lexform'] |
|
216 |
|
|
217 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
218 |
|
|
219 |
rowDict["PivotType${j+1}"] = varDetails[pivotBlockPosition]["type${j+1}"] |
|
220 |
} |
|
221 |
|
|
222 |
def startNextId = (varDetails[pivotBlockPosition]['terminalsIx']).min() |
|
223 |
def endPrevId = (varDetails[pivotBlockPosition]['terminalsIx']).max() |
|
224 |
|
|
225 |
// Write the Pre-pivot blocks |
|
226 |
|
|
227 |
def i = 0 |
|
228 |
|
|
229 |
//println "rowDict: $rowDict" |
|
230 |
if ( pivotBlockPosition > 0) { |
|
231 |
for ( int j = pivotBlockPosition -1 ; j >= 0 ; j--) { |
|
232 |
|
|
233 |
i++; |
|
234 |
def block = varDetails.get(j) |
|
235 |
//println "process block: $block" |
|
236 |
|
|
237 |
rowDict["${i}BlockBeforePivot"] = block['lexform'] |
|
238 |
|
|
239 |
for ( int k = 0 ; k < [ntTypes.size(), tTypes.size()].max() ; k++ ) { |
|
240 |
rowDict["${i}BlockBeforePivotType${k+1}"] = block["type${k+1}"] |
|
241 |
} |
|
242 |
|
|
243 |
// Add any intervening words to the right edge. |
|
244 |
|
|
245 |
def rightEdge = (block['terminalsIx'].findAll{ it < startNextId }).max(); |
|
246 |
//println ""+block['terminalsIx']+" rightEdge: $rightEdge" |
|
247 |
|
|
248 |
for (int ix = rightEdge + 1 ; ix < startNextId ; ix++) // add word to reach the pivot |
|
249 |
{ |
|
250 |
rowDict["${i}"+"BlockBeforePivot"] += ' {' + tNodesInSentence[ix].getAttribute('word') + '}'; |
|
251 |
} |
|
252 |
|
|
253 |
startNextId = (block['terminalsIx']).min() |
|
254 |
} |
|
255 |
} |
|
256 |
|
|
257 |
// Write the in-sentence left context |
|
258 |
|
|
259 |
def lexs = tNodesInSentence.subList(0, startNextId).collect {it.getAttribute('word')} |
|
260 |
rowDict['LeftCxInsideSnt'] = lexs.join(" ") |
|
261 |
|
|
262 |
def inSntCxLengthLeft = startNextId |
|
263 |
|
|
264 |
// Write the Post-pivot blocks |
|
265 |
|
|
266 |
i = 0 |
|
267 |
for (def block in varDetails.subList(pivotBlockPosition + 1, varDetails.size())){ |
|
268 |
i++ |
|
269 |
rowDict["${i}BlockAfterPivot"] = block['lexform'] |
|
270 |
|
|
271 |
for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) { |
|
272 |
rowDict["${i}BlockAfterPivotType${j+1}"] = block["type${j+1}"] |
|
273 |
} |
|
274 |
|
|
275 |
// Add any intervening words to the left edge. |
|
276 |
// Note that this isn't always possible, in particular |
|
277 |
// if the preceding element is discontinuous. |
|
278 |
|
|
279 |
def leftEdgeList = block['terminalsIx'].findAll{it > endPrevId } |
|
280 |
|
|
281 |
if ( leftEdgeList.size() > 0) { |
|
282 |
def leftEdge = leftEdgeList.min() |
|
283 |
for (int ix = leftEdge - 1 ; ix > endPrevId ; ix--) { |
|
284 |
rowDict["${i}BlockAfterPivot"] = '{' + tNodesInSentence[ix].getAttribute('word') + '} ' + rowDict["${i}BlockAfterPivot"] |
|
285 |
} |
|
286 |
} |
|
287 |
else { |
|
288 |
rowDict["${i}BlockAfterPivot"] = '{?} ' + rowDict["${i}BlockAfterPivot"] |
|
289 |
} |
|
290 |
endPrevId = block['terminalsIx'].max() |
|
291 |
} |
|
292 |
|
|
293 |
// Write the in-sentence right-context |
|
294 |
|
|
295 |
lexs = tNodesInSentence.subList(endPrevId + 1, tNodesInSentence.size()).collect {it.getAttribute('word')} |
|
296 |
rowDict['RightCxInsideSnt'] = lexs.join(" ") |
|
297 |
|
|
298 |
def inSntCxLengthRight = tNodesInSentence.size() - endPrevId |
|
299 |
|
|
300 |
return [rowDict, inSntCxLengthLeft, inSntCxLengthRight] |
|
301 |
return null; |
|
302 |
} |
|
303 |
|
|
304 |
public def toList(def iterable) { |
|
305 |
return iterable.findAll {true}; |
|
306 |
} |
|
307 |
|
|
308 |
/** |
|
309 |
* return the children of sNode with the id anId |
|
310 |
*/ |
|
311 |
public def idKey(def sNode, String anId) { |
|
312 |
return sNode.getElementsByTagName("*").find{it.getAttribute("id") == anId} |
|
313 |
} |
|
314 |
|
|
315 |
/** |
|
316 |
* return a list of all terminal nodes of the node. |
|
317 |
* if the node is a non-terminal, iterate over children and so on |
|
318 |
*/ |
|
319 |
def getTNodes(theNode) { |
|
320 |
def terminals = [] |
|
321 |
def unprocessed = [theNode] |
|
322 |
|
|
323 |
while( unprocessed.size() > 0) { |
|
324 |
def aNode = unprocessed.pop() |
|
325 |
def edges = toList(aNode.getElementsByTagName('edge')) |
|
326 |
if (edges.size() == 0) |
|
327 |
terminals.add(aNode) |
|
328 |
else |
|
329 |
for (def anEdge : edges) |
|
330 |
unprocessed.add(idKey(theNode.parentNode.parentNode, anEdge.getAttribute('idref'))) |
|
331 |
} |
|
332 |
return terminals; |
|
333 |
} |
|
334 |
|
|
335 |
/** |
|
336 |
* |
|
337 |
* @param theNode |
|
338 |
* @return the join of the terminal nodes value in theNode a varaible node |
|
339 |
*/ |
|
340 |
def writeLexForm(theNode) { |
|
341 |
def sNode = theNode.parentNode.parentNode // get the sentence of the variable |
|
342 |
def allTNodes = sNode.getElementsByTagName('t') // get all sentence children |
|
343 |
def tNodesInTheNode = getTNodes(theNode) // get the terminal nodes pointed by idref |
|
344 |
def begunNode = false |
|
345 |
def lexs = [] |
|
346 |
def lexBuffer = [] |
|
347 |
for (def aTNode : allTNodes){ |
|
348 |
if ( aTNode in tNodesInTheNode) { |
|
349 |
begunNode = true |
|
350 |
lexs.addAll(lexBuffer) |
|
351 |
lexs.add(aTNode.getAttribute('word')) |
|
352 |
lexBuffer = [] |
|
353 |
} |
|
354 |
if ( begunNode && !tNodesInTheNode.contains(aTNode)) |
|
355 |
lexBuffer.add('[' + aTNode.getAttribute('word') + ']') |
|
356 |
} |
|
357 |
return lexs.join(" "); |
|
358 |
} |
|
359 |
|
|
360 |
def matchCombine(theDOM) { |
|
361 |
|
|
362 |
def matchesNodes = theDOM.getElementsByTagName('matches') |
|
363 |
|
|
364 |
for (def aMatchesNode : matchesNodes){ |
|
365 |
|
|
366 |
def pivotNodes = toList( |
|
367 |
aMatchesNode.getElementsByTagName('variable') |
|
368 |
).findAll{it.getAttribute('name') == '#pivot'} |
|
369 |
|
|
370 |
def checkedPivots = [] |
|
371 |
|
|
372 |
while (pivotNodes){ |
|
373 |
|
|
374 |
def aPivotNode = pivotNodes.remove(0) |
|
375 |
|
|
376 |
def matchingPivotList = checkedPivots.findAll{ |
|
377 |
it.getAttribute('idref') == aPivotNode.getAttribute('idref') |
|
378 |
} |
|
379 |
|
|
380 |
if (matchingPivotList) { |
|
381 |
|
|
382 |
// duplicate pivot; copy all variables |
|
383 |
|
|
384 |
|
|
385 |
for (def node : toList(aPivotNode.getParentNode() |
|
386 |
.getElementsByTagName('variable') |
|
387 |
)) { |
|
388 |
|
|
389 |
matchingPivotList[0].getParentNode().appendChild(node) |
|
390 |
|
|
391 |
} |
|
392 |
|
|
393 |
def variables = toList( |
|
394 |
matchingPivotList[0].getParentNode() |
|
395 |
.getElementsByTagName('variable') |
|
396 |
) |
|
397 |
|
|
398 |
// remove duplicates |
|
399 |
|
|
400 |
def checkedVariables = [] |
|
401 |
|
|
402 |
while (variables) { |
|
403 |
|
|
404 |
def aVariableNode = variables.remove(0) |
|
405 |
|
|
406 |
def matchingVariableList = checkedVariables.findAll{ |
|
407 |
( |
|
408 |
it.getAttribute('idref') == aVariableNode.getAttribute('idref') |
|
409 |
&& ( |
|
410 |
it.getAttribute('name') == aVariableNode.getAttribute('name') |
|
411 |
|| ( |
|
412 |
it.getAttribute('name').startsWith('#block') |
|
413 |
&& aVariableNode.getAttribute('name').startsWith('#block') |
|
414 |
) |
|
415 |
) |
|
416 |
) |
|
417 |
} |
|
418 |
|
|
419 |
if (matchingVariableList) { |
|
420 |
|
|
421 |
// remove variable node |
|
422 |
|
|
423 |
aVariableNode.getParentNode().removeChild(aVariableNode) |
|
424 |
|
|
425 |
} |
|
426 |
|
|
427 |
else { |
|
428 |
|
|
429 |
checkedVariables.add(aVariableNode) |
|
430 |
|
|
431 |
} |
|
432 |
} |
|
433 |
} |
|
434 |
else { |
|
435 |
|
|
436 |
// not the same pivot |
|
437 |
|
|
438 |
checkedPivots.add(aPivotNode) |
|
439 |
} |
|
440 |
} |
|
441 |
// Tidying up: remove empty <match /> nodes |
|
442 |
def matchNodes = toList(aMatchesNode.getElementsByTagName('match')) |
|
443 |
|
|
444 |
for (def matchNode : matchNodes) { |
|
445 |
|
|
446 |
if ( !toList(matchNode.getElementsByTagName('variable'))) { |
|
447 |
|
|
448 |
aMatchesNode.removeChild(matchNode) |
|
449 |
} |
|
450 |
} |
|
451 |
} |
|
452 |
|
|
453 |
return theDOM |
|
454 |
} |
|
455 |
|
|
456 |
public static void usage() { |
|
457 |
println 'concordance_blocks [OPTIONS] inputfile.xml [outputfile.csv]' |
|
458 |
println 'OPTIONS' |
|
459 |
println '-h, --help Displays this message.' |
|
460 |
println '-c --context [length] Sets the number of words in context.' |
|
461 |
} |
|
462 |
|
|
463 |
static main(args) { |
|
464 |
if(args.length == 0) |
|
465 |
usage() |
|
466 |
int cx = 30; |
|
467 |
def ntTypes = ['cat']; |
|
468 |
def tTypes = ['pos']; |
|
469 |
File xmlfile = new File(args[0]) |
|
470 |
File outfile = new File(args[1]) |
|
471 |
ConcordanceBlocks p = new ConcordanceBlocks(); |
|
472 |
println "START" |
|
473 |
p.process(xmlfile, outfile, cx, ntTypes, tTypes) |
|
474 |
println "END" |
|
475 |
} |
|
476 |
} |
tmp/org.txm.tigersearch.rcp/src/org/txm/function/tigersearch/TIGERSearch.java (revision 713) | ||
---|---|---|
22 | 22 |
|
23 | 23 |
public class TIGERSearch extends TXMResult implements IAdaptable { |
24 | 24 |
|
25 |
TSCorpusManager manager; |
|
26 |
TSCorpus tscorpus; |
|
27 |
boolean ready = false; |
|
28 |
private TSResult tsresult;
|
|
29 |
String T, NT; |
|
30 |
private MainCorpus corpus;
|
|
31 |
private String query;
|
|
25 |
protected TSCorpusManager manager;
|
|
26 |
protected TSCorpus tscorpus;
|
|
27 |
protected boolean ready = false;
|
|
28 |
protected TSResult tsresult;
|
|
29 |
protected String T, NT;
|
|
30 |
protected MainCorpus corpus;
|
|
31 |
protected String query;
|
|
32 | 32 |
|
33 |
public MainCorpus getCorpus() { |
|
34 |
return corpus; |
|
35 |
} |
|
36 |
|
|
33 | 37 |
public TIGERSearch(Corpus corpus) { |
34 | 38 |
super(corpus); |
35 | 39 |
this.corpus = corpus.getMainCorpus(); |
... | ... | |
166 | 170 |
@Override |
167 | 171 |
public String getLabel(Object o) { |
168 | 172 |
String q = ((TIGERSearch) o).getQuery(); |
169 |
return q.substring(0, Math.min(10, q.length())).replaceAll("\n", "")+"..."; |
|
173 |
if (q != null) { |
|
174 |
return q.substring(0, Math.min(10, q.length())).replaceAll("\n", "")+"..."; |
|
175 |
} else { |
|
176 |
return ((TIGERSearch) o).getCorpus().getName(); |
|
177 |
} |
|
170 | 178 |
} |
171 | 179 |
|
172 | 180 |
@Override |
... | ... | |
243 | 251 |
|
244 | 252 |
@Override |
245 | 253 |
public boolean canCompute() { |
246 |
return corpus != null && query.length() > 0; |
|
254 |
return corpus != null && query != null && query.length() > 0; |
Formats disponibles : Unified diff