Revision 713

tmp/org.txm.tigersearch.rcp/.classpath (revision 713)
15 15
			<accessrule kind="accessible" pattern="**"/>
16 16
		</accessrules>
17 17
	</classpathentry>
18
	<classpathentry exported="true" kind="con" path="GROOVY_SUPPORT"/>
19
	<classpathentry exported="true" kind="con" path="GROOVY_DSL_SUPPORT"/>
18 20
	<classpathentry kind="output" path="bin"/>
19 21
</classpath>
tmp/org.txm.tigersearch.rcp/META-INF/MANIFEST.MF (revision 713)
323 323
 org.mozilla.javascript.tools.shell,
324 324
 org.relaxng.datatype,
325 325
 org.relaxng.datatype.helpers,
326
 org.txm.export.ts,
326 327
 org.txm.function.tigersearch,
327 328
 org.txm.importer.srcmf,
328 329
 org.txm.importer.srcmf2,
......
343 344
 tigersearch4txm.handlers
344 345
Import-Package: ims.tiger.gui.tigergraphviewer.forest
345 346
Bundle-Vendor: Textometrie.org
347
Bundle-ClassPath: lib/dom4j-1.6.1.jar,
348
 lib/log4j-1.2.12.jar,
349
 lib/TigerSearch.jar,
350
 .
tmp/org.txm.tigersearch.rcp/.project (revision 713)
22 22
		</buildCommand>
23 23
	</buildSpec>
24 24
	<natures>
25
		<nature>org.eclipse.jdt.groovy.core.groovyNature</nature>
25 26
		<nature>org.eclipse.pde.PluginNature</nature>
26 27
		<nature>org.eclipse.jdt.core.javanature</nature>
27 28
	</natures>
tmp/org.txm.tigersearch.rcp/src/org/txm/test/DrawTSSVG.groovy (revision 713)
1
/**
2
 * Main.
3
 *
4
 * @param args the args
5
 */
6
// Copyright © 2010-2013 ENS de Lyon.
7
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
8
// Lyon 2, University of Franche-Comté, University of Nice
9
// Sophia Antipolis, University of Paris 3.
10
//
11
// The TXM platform is free software: you can redistribute it
12
// and/or modify it under the terms of the GNU General Public
13
// License as published by the Free Software Foundation,
14
// either version 2 of the License, or (at your option) any
15
// later version.
16
//
17
// The TXM platform is distributed in the hope that it will be
18
// useful, but WITHOUT ANY WARRANTY; without even the implied
19
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
20
// PURPOSE. See the GNU General Public License for more
21
// details.
22
//
23
// You should have received a copy of the GNU General
24
// Public License along with the TXM platform. If not, see
25
// http://www.gnu.org/licenses.
26
//
27
//
28
//
29
// $LastChangedDate:$
30
// $LastChangedRevision:$
31
// $LastChangedBy:$
32
//
33
package org.txm.test
34

  
35
import org.txm.Toolbox
36
import org.txm.searchengine.ts.*
37
import org.txm.utils.ExecTimer
38

  
39

  
40
String userhome = System.getProperty("user.home")
41
Toolbox.setParam(Toolbox.USER_TXM_HOME, "/home/mdecorde/TXM/")
42
File configdir = new File(userhome,"TXM/Tiger/tigersearch.logprop")
43
File registrydir = new File(userhome, "TXM/Tiger/corpora/")
44
File svgfile = new File(userhome,"TXM/Tiger/result.svg")
45
String id = "GRAAL"
46
String query = """#n:[cat = "Obj"] >* #m & arity(#n, 2, 10)"""
47
//String query = "[]"
48

  
49
TSCorpusManager manager = new TSCorpusManager(registrydir, configdir)
50
if(manager.isInitialized()) {
51

  
52
	ExecTimer.start()
53
	TSCorpus corpus = manager.getCorpus(id);
54

  
55
	println "T features: "+corpus.getTFeatures()
56
	println "NT features: "+corpus.getNTFeatures()
57

  
58
	TSResult result = corpus.query(query);
59

  
60
	result.setDisplayProperties(["word", "pos", "form"], "type");
61

  
62
	TSMatch first = result.getFirst();
63
	println "First: nb of sub graph: "+first.getNumberOfSubGraph();
64
	first.firstSubGraph();
65
	first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_1.svg"));
66
	println first.toHTML() + "<br>"
67
	first.nextSubGraph()
68
	first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_2.svg"));
69
	println first.toHTML() + "<br>"
70
	first.nextSubGraph()
71
	first.toSVGFile(new File(userhome,"TXM/Tiger/result_1_3.svg"));
72
	println first.toHTML() + "<br>"
73

  
74
	TSMatch next = result.getNext();
75
	println "Next: nb of sub graph: "+next.getNumberOfSubGraph();
76
	next.firstSubGraph();
77
	next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_1.svg"));
78
	println next.toHTML() + "<br>"
79
	next.nextSubGraph()
80
	next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2.svg"));
81
	println next.toHTML() + "<br>"
82
	next.nextSubGraph()
83
	next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_3.svg"));
84
	println next.toHTML() + "<br>"
85
	next.previousSubGraph()
86
	next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2bis.svg"));
87
	println next.toHTML() + "<br>"
88

  
89

  
90

  
91
	//	println "make some room :)"
92
	//	for(File f : new File("/home/mdecorde/Bureau/tigerexports/").listFiles())
93
	//		f.delete()
94
	//
95

  
96
	//	println "SAVE XML"
97
	//	result.toXml(new File("/home/mdecorde/Bureau/tigerexports/result.xml"), false, true)
98
	//
99
	//	println "SAVE AS SVG"
100
	//	for (int i = 0 ; i < result.getNumberOfMatch() && i < 10; i++) {
101
	//		result.getMatch(i).toSVGFile(new File("/home/mdecorde/Bureau/tigerexports/match_"+i+".svg"))
102
	//	}
103
	//
104
	//	println "SIMPLE NO PNC"
105
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export1.csv"), "concordance_simple", 30, ["cat"], ["pos"], false);
106
	//	println "MOT-PIVOT NO PNC"
107
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export2.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], false);
108
	//	println "BLOCKS NO PNC"
109
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export3.csv"), "concordance_blocks", 30, ["cat"], ["pos"], false);
110
	//
111
	//	println "SIMPLE + PNC"
112
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export12.csv"), "concordance_simple", 30, ["cat"], ["pos"], true);
113
	//	println "MOT-PIVOT + PNC"
114
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export22.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], true);
115
	//	println "BLOCKS + PNC"
116
	//	println result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export32.csv"), "concordance_blocks", 30, ["cat"], ["pos"], true);
117
	println ExecTimer.stop()
118
}
tmp/org.txm.tigersearch.rcp/src/org/txm/test/DrawTSSVG.java (revision 713)
1
package org.txm.test;
2

  
3
import java.io.File;
4
import java.util.Arrays;
5

  
6
import org.txm.searchengine.ts.*;
7
import org.txm.utils.ExecTimer;
8

  
9
public class DrawTSSVG {
10

  
11
	String userhome = System.getProperty("user.home");
12
	File configdir = new File(userhome,"TXM/corpora/graal/tiger/tigersearch.logprop");
13
	File registrydir = new File(userhome, "TXM/corpora/graal/tiger");
14
	File svgfile = new File(registrydir, "result.svg");
15
	String id = "GRAAL";
16
	String query = "#n:[cat = \"Obj\"] >* #m & arity(#n, 2, 10)";
17
	//String query = "[]";
18

  
19
	public void test() throws Exception {
20
		TSCorpusManager manager = new TSCorpusManager(registrydir, configdir);
21

  
22
		if (manager.isInitialized()) {
23

  
24
			ExecTimer.start();
25
			TSCorpus corpus = manager.getCorpus(id);
26

  
27
			System.out.println("T features: "+corpus.getTFeatures());
28
			System.out.println("NT features: "+corpus.getNTFeatures());
29

  
30
			TSResult result = corpus.query(query);
31

  
32
			result.setDisplayProperties(Arrays.asList("word", "pos", "form"), "type");
33

  
34
			TSMatch first = result.getFirst();
35
			System.out.println("First: nb of sub graph: "+first.getNumberOfSubGraph());
36
			first.firstSubGraph();
37
			first.toSVGFile(new File(registrydir,"result_1_1.svg"));
38
			System.out.println(first.toHTML() + "<br>");
39
			first.nextSubGraph();
40
			first.toSVGFile(new File(registrydir,"result_1_2.svg"));
41
			System.out.println(first.toHTML() + "<br>");
42
			first.nextSubGraph();
43
			first.toSVGFile(new File(registrydir,"result_1_3.svg"));
44
			System.out.println(first.toHTML() + "<br>");
45

  
46
//			TSMatch next = result.getNext();
47
//			System.out.println( "Next: nb of sub graph: "+next.getNumberOfSubGraph());
48
//			next.firstSubGraph();
49
//			next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_1.svg"));
50
//			System.out.println( next.toHTML() + "<br>");
51
//			next.nextSubGraph();
52
//			next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2.svg"));
53
//			System.out.println( next.toHTML() + "<br>");
54
//			next.nextSubGraph();
55
//			next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_3.svg"));
56
//			System.out.println( next.toHTML() + "<br>");
57
//			next.previousSubGraph();
58
//			next.toSVGFile(new File(userhome,"TXM/Tiger/result_2_2bis.svg"));
59
//			System.out.println( next.toHTML() + "<br>");
60

  
61
			//	System.out.println( "make some room :)"
62
			//	for(File f : new File("/home/mdecorde/Bureau/tigerexports/").listFiles())
63
			//		f.delete()
64
			//
65

  
66
			//	System.out.println( "SAVE XML"
67
			//	result.toXml(new File("/home/mdecorde/Bureau/tigerexports/result.xml"), false, true)
68
			//
69
			//	System.out.println( "SAVE AS SVG"
70
			//	for (int i = 0 ; i < result.getNumberOfMatch() && i < 10; i++) {
71
			//		result.getMatch(i).toSVGFile(new File("/home/mdecorde/Bureau/tigerexports/match_"+i+".svg"))
72
			//	}
73
			//
74
				System.out.println("SIMPLE NO PNC");
75
				System.out.println(result.toConcordance(new File(registrydir, "export1.csv"), "concordance_simple", 30, Arrays.asList("cat"), Arrays.asList("pos"), false));
76
				System.out.println("MOT-PIVOT NO PNC");
77
				System.out.println(result.toConcordance(new File(registrydir, "export2.csv"), "concordance_mot-pivot", 30, Arrays.asList("cat"), Arrays.asList("pos"), false));
78
				System.out.println("BLOCKS NO PNC");
79
				System.out.println(result.toConcordance(new File(registrydir, "export3.csv"), "concordance_blocks", 30, Arrays.asList("cat"), Arrays.asList("pos"), false));
80
			
81
			//	System.out.println( "SIMPLE + PNC"
82
			//	System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export12.csv"), "concordance_simple", 30, ["cat"], ["pos"], true);
83
			//	System.out.println( "MOT-PIVOT + PNC"
84
			//	System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export22.csv"), "concordance_mot-pivot", 30, ["cat"], ["pos"], true);
85
			//	System.out.println( "BLOCKS + PNC"
86
			//	System.out.println( result.toConcordance(new File("/home/mdecorde/Bureau/tigerexports/export32.csv"), "concordance_blocks", 30, ["cat"], ["pos"], true);
87
			System.out.println(ExecTimer.stop());
88
		}
89
	}
90
	
91
	public static void main(String[] args) {
92
		DrawTSSVG d = new DrawTSSVG();
93
		try {
94
			d.test();
95
		} catch (Exception e) {
96
			// TODO Auto-generated catch block
97
			e.printStackTrace();
98
		}
99
	}
100
}
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/commands/ComputeTSIndex.java (revision 713)
38 38
import org.txm.rcp.editors.TXMResultEditorInput;
39 39
import org.txm.searchengine.cqp.corpus.Corpus;
40 40
import org.txm.searchengine.cqp.corpus.Partition;
41
import org.txm.test.DrawTSSVG;
41 42
import org.txm.tigersearch.editors.TIGERSearchEditor;
42 43
import org.txm.tigersearch.editors.TIGERSearchEditorInput;
43 44
import org.txm.tigersearch.editors.TSIndexEditor;
......
59 60
	@Override
60 61
	public Object execute(final ExecutionEvent event) throws ExecutionException {
61 62

  
62
		IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event);
63

  
64
		Object s = selection.getFirstElement();
65
		if (s instanceof Corpus) {
66
			Corpus corpus = (Corpus)s;
67
			openEditor(corpus);
68
		} else if (s instanceof Partition) {
69
			Partition partition = (Partition)s;
70
			openEditor(partition);
63
//		IStructuredSelection selection = (IStructuredSelection) HandlerUtil.getCurrentSelection(event);
64
//
65
//		Object s = selection.getFirstElement();
66
//		if (s instanceof Corpus) {
67
//			Corpus corpus = (Corpus)s;
68
//			openEditor(corpus);
69
//		} else if (s instanceof Partition) {
70
//			Partition partition = (Partition)s;
71
//			openEditor(partition);
72
//		}
73
		
74
		DrawTSSVG d = new DrawTSSVG();
75
		try {
76
			d.test();
77
		} catch (Exception e) {
78
			// TODO Auto-generated catch block
79
			e.printStackTrace();
71 80
		}
72 81
		return null;
73 82
	}
tmp/org.txm.tigersearch.rcp/src/org/txm/tigersearch/editors/TIGERSearchEditor.java (revision 713)
10 10
import org.eclipse.swt.custom.StyledText;
11 11
import org.eclipse.swt.events.SelectionEvent;
12 12
import org.eclipse.swt.events.SelectionListener;
13
import org.eclipse.swt.graphics.Image;
14
import org.eclipse.swt.layout.FormAttachment;
15
import org.eclipse.swt.layout.FormData;
16
import org.eclipse.swt.layout.FormLayout;
17 13
import org.eclipse.swt.layout.GridData;
18 14
import org.eclipse.swt.layout.GridLayout;
19 15
import org.eclipse.swt.widgets.Button;
......
26 22
import org.eclipse.ui.IEditorInput;
27 23
import org.eclipse.ui.IEditorSite;
28 24
import org.eclipse.ui.PartInitException;
29
import org.eclipse.ui.part.EditorPart;
30
import org.txm.Toolbox;
31 25
import org.txm.core.preferences.TBXPreferences;
32 26
import org.txm.core.preferences.TXMPreferences;
33
import org.txm.core.results.TXMResult;
34 27
import org.txm.function.tigersearch.TIGERSearch;
35
import org.txm.rcp.IImageKeys;
36 28
import org.txm.rcp.JobsTimer;
37 29
import org.txm.rcp.editors.TXMEditor;
30
import org.txm.rcp.editors.TXMResultEditorInput;
38 31
import org.txm.rcp.svg.SVGComposite;
39 32
import org.txm.rcp.utils.JobHandler;
40 33
import org.txm.rcp.views.QueriesView;
......
73 66

  
74 67
	@Override
75 68
	public void _createPartControl(Composite parent) {
76
		
77
		Composite mainPanel = new Composite(parent, SWT.NONE);
78
		mainPanel.setLayout(new FormLayout());
79 69

  
80 70
		// System.out.println(parent.getLayout());
81
		Composite queryPanel = new Composite(mainPanel, SWT.NONE);
82
		Composite paramPanel = new Composite(mainPanel, SWT.NONE);
83
		svgPanel = new SVGComposite(mainPanel, SWT.EMBEDDED | SWT.NO_BACKGROUND);
84

  
85
		FormData qdata = new FormData();
86
		qdata.top = new FormAttachment(0);
87
		qdata.left = new FormAttachment(0);
88
		qdata.right = new FormAttachment(100);
89
		qdata.bottom = new FormAttachment(30);
90
		queryPanel.setLayoutData(qdata);
91

  
92
		FormData fdata = new FormData();
93
		fdata.top = new FormAttachment(queryPanel);
94
		fdata.left = new FormAttachment(0);
95
		fdata.right = new FormAttachment(100);
96
		paramPanel.setLayoutData(fdata);
97

  
98
		fdata = new FormData();
99
		fdata.top = new FormAttachment(paramPanel);
100
		fdata.left = new FormAttachment(0);
101
		fdata.right = new FormAttachment(100);
102
		fdata.bottom = new FormAttachment(100);
103
		svgPanel.setLayoutData(fdata);
104

  
71
		Composite queryPanel = this.getCommandParametersGroup();
72
	
105 73
		// fill query Area
106
		GridLayout qlayout = new GridLayout(1, true);
74
		GridLayout qlayout = new GridLayout(11, false);
107 75
		queryPanel.setLayout(qlayout);
108 76

  
109 77
		queryArea = new StyledText(queryPanel, SWT.BORDER | SWT.V_SCROLL | SWT.H_SCROLL);
110 78
		GridData queryAreaLayoutData = new GridData(GridData.FILL, GridData.FILL, true, true);
79
		queryAreaLayoutData.horizontalSpan = 11;
80
		queryAreaLayoutData.heightHint = 80;
81
		queryAreaLayoutData.minimumHeight = 80;
111 82
		queryArea.setLayoutData(queryAreaLayoutData);
112 83

  
113 84
		// fill param Area
114
		GridLayout layout = new GridLayout(11, false);
115
		paramPanel.setLayout(layout);
116 85

  
117 86
		GridData gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
118
		new Label(paramPanel, SWT.NONE).setText("Sent ");
119
		sentSpinner = new Spinner(paramPanel, SWT.BORDER);
87
		new Label(queryPanel, SWT.NONE).setText("Sent ");
88
		sentSpinner = new Spinner(queryPanel, SWT.BORDER);
120 89
		sentSpinner.setMinimum(1);
121 90
		sentSpinner.setIncrement(1);
122 91
		sentSpinner.setMaximum(10000000);
......
135 104
		};
136 105
		sentSpinner.addSelectionListener(selChangedListener);
137 106

  
138
		sentCounterLabel = new Label(paramPanel, SWT.NONE);
107
		sentCounterLabel = new Label(queryPanel, SWT.NONE);
139 108
		gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
140 109
		sentCounterLabel.setLayoutData(gdata);
141 110

  
142
		new Label(paramPanel, SWT.NONE).setText("Sub ");
143
		subSpinner = new Spinner(paramPanel, SWT.BORDER);
111
		new Label(queryPanel, SWT.NONE).setText("Sub ");
112
		subSpinner = new Spinner(queryPanel, SWT.BORDER);
144 113
		subSpinner.setMinimum(1);
145 114
		subSpinner.setIncrement(1);
146 115
		subSpinner.setMaximum(100000000);
......
149 118
		subSpinner.setLayoutData(gdata);
150 119
		subSpinner.addSelectionListener(selChangedListener);
151 120

  
152
		subCounterLabel = new Label(paramPanel, SWT.NONE);
121
		subCounterLabel = new Label(queryPanel, SWT.NONE);
153 122
		gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
154 123
		subCounterLabel.setLayoutData(gdata);
155 124

  
156
		new Label(paramPanel, SWT.NONE).setText("T ");
157
		TCombo = new Combo(paramPanel, SWT.READ_ONLY);
125
		new Label(queryPanel, SWT.NONE).setText("T ");
126
		TCombo = new Combo(queryPanel, SWT.READ_ONLY);
158 127
		gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
159 128
		TCombo.setLayoutData(gdata);
160 129
		TCombo.addSelectionListener(selChangedListener);
161 130

  
162
		new Label(paramPanel, SWT.NONE).setText("NT ");
163
		NTCombo = new Combo(paramPanel, SWT.READ_ONLY);
131
		new Label(queryPanel, SWT.NONE).setText("NT ");
132
		NTCombo = new Combo(queryPanel, SWT.READ_ONLY);
164 133
		gdata = new GridData(SWT.FILL, SWT.CENTER, true, true);
165 134
		NTCombo.setLayoutData(gdata);
166 135
		NTCombo.addSelectionListener(selChangedListener);
167 136

  
168
		okButton = new Button(paramPanel, SWT.PUSH);
137
		okButton = new Button(queryPanel, SWT.PUSH);
169 138
		okButton.setText("Search");
170 139
		okButton.addSelectionListener(new SelectionListener() {
171 140
			@Override
......
186 155
			}
187 156
		});
188 157

  
158
		Composite mainPanel = this.getResultArea();
159
		mainPanel.setLayout(new GridLayout(1, false));
160
		svgPanel = new SVGComposite(mainPanel, SWT.EMBEDDED | SWT.NO_BACKGROUND);
161
		svgPanel.setLayoutData(new GridData(SWT.FILL, SWT.FILL, true, true));
162
		
189 163
		initializeFields();
190 164
	}
191 165

  
......
370 344

  
371 345
	@Override
372 346
	public void setFocus() {
373
		queryArea.forceFocus();
347
		if (queryArea != null)
348
			queryArea.forceFocus();
374 349
	}
375 350

  
376 351
	@Override
......
389 364
		} //$NON-NLS-1$ //$NON-NLS-2$
390 365
		Log.severe(svgFile.toString());
391 366
		
392
		TIGERSearchEditorInput ii = (TIGERSearchEditorInput) input;
393
		source = ii.getSource();
394
		ts = ii.getTIGERSearch();
395
		if (source instanceof Corpus) {
396
			corpus = ((Corpus)source);
397
		}
367
		TXMResultEditorInput<TIGERSearch> ii = (TXMResultEditorInput<TIGERSearch>) input;
368
		ts = ii.getResult();
369
		corpus = ts.getCorpus();
398 370
	}
399 371

  
400 372
	@Override
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/MatchInject.groovy (revision 713)
1
#! /usr/bin/groovy
2
package org.txm.export.ts;
3

  
4
import org.xml.sax.XMLReader;
5
import org.xml.sax.helpers.XMLReaderFactory;
6
import groovy.util.slurpersupport.NodeChild
7

  
8
/*
9
 * The script takes the <matches/> elements from file "Tiger_match.xml"
10
 * and inserts them at the end of the <s/> element bearing the same ID.
11
 * Inputs: three file names 
12
 * - TsInputName --- the TS file WITHOUT matches
13
 * - MatchInputName --- the TS file containing only matches.
14
 * - OutputFileName ---required output file.
15
 * To pass these arguments from within an application, call script() directly.
16
 */
17

  
18
// Filename variables
19
// def TsInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/ts_input.xml'
20
// def MatchInputName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/match_input.xml'
21
// def OutputFileName = '/home/tomr/Documents/Work/lyon12/srcmf/groovy/MatchInject/test.xml'
22

  
23
// Main code: checks for correct number of arguments if run from cmd line.
24
if (args && args.size() == 3) {
25
    script(args[0], args[1], args[2])
26
} else {
27
    println '''Incorrect number of arguments: three strings required.
28

  
29
USAGE:
30
******
31
groovy MatchInject.groovy TsInput.xml MatchInput.xml OutputFile.xml'''}
32

  
33
def script(String tsInputName, String matchInputName, String outputFileName) {
34
	script(new File(tsInputName), new File(matchInputName, new File(outputFileName)));
35
}
36

  
37
// The script.
38
def script(File tsInputFile, File matchInputFile, File outputFile) {
39
	//println "loading TsInput..."
40
    def TsInput = new XmlSlurper().parse(tsInputFile)
41
	//println "loading matchInputFile..."
42
	System.setProperty("org.xml.sax.driver", "com.sun.org.apache.xerces.internal.parsers.SAXParser");
43
	def xmlReader = XMLReaderFactory.createXMLReader();
44
	xmlReader.setFeature('http://xml.org/sax/features/namespaces', false)
45
    XmlSlurper mslurper = new XmlSlurper(xmlReader);
46
	def MatchInput = mslurper.parse(matchInputFile)
47
	//println "building OutputFile... size="+matchInputFile.length()
48
	
49
	def inputSentences = TsInput.'**'.findAll {it.name() == 's'};
50
	def matcheSentences = MatchInput.'**'.findAll { it.name() == 's' }
51
//	println "MATCHES"
52
//	for(NodeChild match : matcheSentences) {
53
//		//println match.getClass()
54
//		match.namespacePrefix = ""
55
//		match.namespaceMap = [:]
56
//		//println match
57
//	}
58
	int count = 0;
59
	//println("nb of input sentences: "+inputSentences.size());
60
	
61
    def markup = {
62
		mkp.xmlDeclaration()
63
		//mkp.declareNamespace("svg":"xmlns=\"http://www.w3.org/2000/svg\"")
64
		
65
        corpus(id:'TSOut') {
66
            body {
67
                inputSentences.each { sPath ->
68
					//if (count++%200 == 0) println((100*count/inputSentences.size()))
69
                    s(id:"${sPath.'@id'}") { 
70
                        mkp.yield(sPath.graph)
71
                        def sMatches = matcheSentences.find {
72
                            it.'@id'.toString() == sPath.'@id'.toString()
73
                        }
74
						
75
                        if (sMatches) {
76
                            mkp.yield(sMatches.matches)
77
                        }
78
						//matcheSentences.removeAll(sMatches)
79
                    }
80
                }
81
            }
82
        }
83
    }
84
    def processor = new groovy.xml.StreamingMarkupBuilder().bind(markup)
85
    outputFile.withWriter { it << groovy.xml.XmlUtil.serialize(processor)}
86
}
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/package.html (revision 713)
1
<html>
2
<body>
3
<p>Contains scripts to build the TIGERSearch concordances</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/ConcordanceBlocks.groovy (revision 713)
1
package org.txm.export.ts;
2

  
3
import javax.xml.parsers.DocumentBuilderFactory
4

  
5
class ConcordanceBlocks {
6
	def codec = 'UTF-8'
7

  
8
	int cx = 30;
9
	def ntTypes = [];
10
	def tTypes = [];
11
	File xmlfile, outfile;
12

  
13
	public boolean process(File xmlfile, File outfile, int cx, def ntTypes, def tTypes) {
14
		this.cx = cx;
15
		this.xmlfile = xmlfile;
16
		this.outfile = outfile;
17
		this.ntTypes = ntTypes;
18
		this.tTypes = tTypes;
19

  
20
		Writer writer = outfile.newPrintWriter(codec)
21

  
22
		println 'Reading XML File'
23
		def factory = DocumentBuilderFactory.newInstance()
24
		factory.setXIncludeAware(true)
25
		def builder = factory.newDocumentBuilder()
26
		def records = builder.parse(xmlfile).documentElement
27
		println 'done.'
28

  
29
		println 'done.  Pre-treating file to combine results with the same pivot...'
30
		records = matchCombine(records)
31
		println 'done.'
32

  
33
		println 'Calculating max. number of blocks...'
34
		def nBlock = 0
35

  
36
		def allMatches = toList(records.getElementsByTagName('match'))
37

  
38
		for (def match : allMatches) {
39
			nBlock = [
40
				nBlock,
41
				toList(match.getElementsByTagName('variable')).findAll{
42
					it.getAttribute('name').startsWith('#block')
43
				}.size()
44
			].max()
45
		}
46

  
47
		println "done ( $nBlock )"
48

  
49
		// Table headers
50
		println 'Writing concordance...'
51

  
52
		def header = ['sId', 'LeftCxOutsideSnt', 'LeftCxInsideSnt']
53

  
54
		for (int i = nBlock ; i > 0 ; i--) {
55

  
56
			header.add("${i}BlockBeforePivot")
57

  
58
			for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
59

  
60
				header.add("${i}BlockBeforePivotType${j+1}")
61

  
62
			}
63
		}
64

  
65
		header.add('Pivot')
66

  
67
		for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
68

  
69
			header.add("PivotType${j+1}")
70

  
71
		}
72

  
73
		for (int i = 1 ; i <= nBlock ; i++) {
74

  
75
			header.add("${i}BlockAfterPivot")
76

  
77
			for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
78

  
79
				header.add("${i}BlockAfterPivotType${j+1}")
80

  
81
			}
82
		}
83

  
84
		header.addAll(['RightCxInsideSnt', 'RightCxOutsideSnt', 'Warnings'])
85

  
86
		writer.write(header.join("\t")+"\n");
87

  
88
		def allTerminals = toList(records.getElementsByTagName('t'))
89
		int tenPercentile = 0
90

  
91
		int i = 0;
92
		for (def aMatchNode : allMatches) {
93
			if ((int)(((float)++i / allMatches.size()) * 10) > tenPercentile)
94
				println ""+(++tenPercentile * 10)+ ' percent complete...'
95

  
96
			def (rowDict, inSntCxLengthLeft, inSntCxLengthRight) = match2CSVrow(aMatchNode, header);
97
			
98
			// Add out-of-sentence context
99
			def sNode = aMatchNode.parentNode.parentNode; // sentence
100
			def terminals = toList(sNode.getElementsByTagName('t'));
101
			def firstTInS = terminals[0] // get first node of the sentence
102
			def lastTInS = terminals[-1] // get last node of the sentence
103
			def firstTInSIx = allTerminals.indexOf(firstTInS) // get its position in the text
104
			def lastTInSIx = allTerminals.indexOf(lastTInS) // get its position in the text
105

  
106
			// Left context
107
			def lexs = []
108
			int start = Math.max(firstTInSIx - cx + inSntCxLengthLeft, 0);
109
			int end = firstTInSIx;
110
			if(start < end)
111
			for (def tNode : allTerminals.subList(start, end)){
112
				lexs.add(tNode.getAttribute('word'))
113
				if (toList(tNode.parentNode.getElementsByTagName('t'))[-1] == tNode)
114
					lexs.add('/')
115
			}
116
			rowDict['LeftCxOutsideSnt'] = lexs.join(" ")
117

  
118
			// Right context
119
			lexs = []
120

  
121
			start = lastTInSIx + 1;
122
			end = Math.min(allTerminals.size(), lastTInSIx + cx - inSntCxLengthRight)
123

  
124
			if(start < end) {
125
				for (def tNode : allTerminals.subList(start, end)){
126
					lexs.add(tNode.getAttribute('word'))
127
					if (toList(tNode.parentNode.getElementsByTagName('t'))[-1] == tNode)
128
						lexs.add('/')
129
				}}
130
			rowDict["RightCxOutsideSnt"] = lexs.join(" ")
131

  
132
			// Right rowDict to CSV
133

  
134
			//			CSVWriter.writerow(dict(zip([k for k in rowDict.iterkeys()], \
135
			//        [ v.encode('utf-8') for v in rowDict.itervalues() ] )))
136

  
137
			String line = "";
138
			for (int ii = 0 ; ii < header.size() ; ii++){
139
				String h = header.get(ii)
140
				String val = rowDict.getAt(h);
141
				if (val == null)
142
					line += "--"
143
				else
144
					line += val
145
				if (ii < header.size() -1)
146
					line +="\t"
147
			}
148
			writer.write(line+"\n")
149
			writer.flush()
150

  
151
		}
152
		writer.close();
153
		return true;
154
	}
155

  
156
	private def match2CSVrow(def aMatchNode, def header) {
157
		// Step 1: Build a LIST of DICTIONARIES to describe the variables:
158
		// dict(name='varname_minus_the_hash'
159
		//      parent='nt_nodes'
160
		//      terminals='t_nodes in a list')
161
		
162
		def sNode = aMatchNode.parentNode.parentNode // get the sentence node
163

  
164
		def tNodesInSentence = toList(sNode.getElementsByTagName('t')) // get all terminal nodes of the sentence
165

  
166
		def varDetails = []
167

  
168
		for ( def aVariable : aMatchNode.getElementsByTagName('variable')){
169
			if ( aVariable.getAttribute('name') == '#pivot' ||
170
			aVariable.getAttribute('name').startsWith('#block')) {
171
				def varParent = idKey(sNode, aVariable.getAttribute('idref'))
172

  
173
				varDetails.add([
174
							'name' : aVariable.getAttribute('name').substring(1),
175
							'parent': varParent,
176
							'terminals': toList(getTNodes(varParent)),
177
							'lexform': writeLexForm(varParent),
178
						])
179

  
180
				// Add types
181

  
182
				for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
183

  
184
					def a = ''
185

  
186
					if (j < ntTypes.size() && varParent.getAttribute(ntTypes[0])
187
					) {
188
						a = varParent.getAttribute(ntTypes[j])
189
					} else if (j < tTypes.size() && varParent.getAttribute(tTypes[0])
190
					) {
191
						a = varParent.getAttribute(tTypes[j])
192
					}
193

  
194
					varDetails[-1]["type${j+1}"] = a
195

  
196
				}
197

  
198
				// write min idx in sentence of terminal nodes used
199
				varDetails[-1]['terminalsIx'] = varDetails[-1]['terminals'].collect{tNodesInSentence.indexOf(it)}
200
			}
201
		}
202

  
203
		// Sort varDetails by the start ID of the word
204

  
205
		varDetails =  varDetails.sort{it['terminalsIx'].min()}
206

  
207
		def pivotBlockPosition = 0;
208
		pivotBlockPosition = varDetails.findIndexOf{it['name'] == 'pivot'}
209

  
210
		// Write the table
211
		def rowDict = [:]
212
		rowDict['sId'] = sNode.getAttribute('id')
213

  
214
		// write the pivot
215
		rowDict['Pivot'] = varDetails[pivotBlockPosition]['lexform']
216

  
217
		for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
218

  
219
			rowDict["PivotType${j+1}"] = varDetails[pivotBlockPosition]["type${j+1}"]
220
		}
221

  
222
		def startNextId = (varDetails[pivotBlockPosition]['terminalsIx']).min()
223
		def endPrevId = (varDetails[pivotBlockPosition]['terminalsIx']).max()
224

  
225
		// Write the Pre-pivot blocks
226

  
227
		def i = 0
228

  
229
		//println "rowDict: $rowDict"
230
		if ( pivotBlockPosition > 0)	{
231
			for ( int j = pivotBlockPosition -1 ; j >= 0 ; j--) {
232
				
233
				i++;
234
				def block = varDetails.get(j)
235
				//println "process block: $block"
236

  
237
				rowDict["${i}BlockBeforePivot"] = block['lexform']
238

  
239
				for ( int k = 0 ; k < [ntTypes.size(), tTypes.size()].max() ; k++ ) {
240
					rowDict["${i}BlockBeforePivotType${k+1}"] = block["type${k+1}"]
241
				}
242

  
243
				// Add any intervening words to the right edge.
244

  
245
				def rightEdge = (block['terminalsIx'].findAll{ it < startNextId }).max();
246
				//println ""+block['terminalsIx']+"   rightEdge: $rightEdge"
247

  
248
				for (int ix = rightEdge + 1 ; ix < startNextId ; ix++) // add word to reach the pivot
249
				{
250
					rowDict["${i}"+"BlockBeforePivot"] += ' {' + tNodesInSentence[ix].getAttribute('word') + '}';
251
				}
252

  
253
				startNextId = (block['terminalsIx']).min()
254
			}
255
		}
256
		
257
		// Write the in-sentence left context
258

  
259
		def lexs = tNodesInSentence.subList(0, startNextId).collect {it.getAttribute('word')}
260
		rowDict['LeftCxInsideSnt'] = lexs.join(" ")
261

  
262
		def inSntCxLengthLeft = startNextId
263

  
264
		// Write the Post-pivot blocks
265

  
266
		i = 0
267
		for (def block in varDetails.subList(pivotBlockPosition + 1, varDetails.size())){
268
			i++
269
			rowDict["${i}BlockAfterPivot"] = block['lexform']
270

  
271
			for (int j = 0 ; j < [ntTypes.size(), tTypes.size()].max() ; j++) {
272
				rowDict["${i}BlockAfterPivotType${j+1}"] = block["type${j+1}"]
273
			}
274

  
275
			// Add any intervening words to the left edge.
276
			// Note that this isn't always possible, in particular
277
			// if the preceding element is discontinuous.
278

  
279
			def leftEdgeList = block['terminalsIx'].findAll{it > endPrevId }
280

  
281
			if ( leftEdgeList.size() > 0) {
282
				def leftEdge = leftEdgeList.min()
283
				for (int ix = leftEdge - 1 ; ix > endPrevId ; ix--) {
284
					rowDict["${i}BlockAfterPivot"] = '{' + tNodesInSentence[ix].getAttribute('word') + '} ' + rowDict["${i}BlockAfterPivot"]
285
				}
286
			}
287
			else {
288
				rowDict["${i}BlockAfterPivot"] = '{?} ' + rowDict["${i}BlockAfterPivot"]
289
			}
290
			endPrevId = block['terminalsIx'].max()
291
		}
292

  
293
		// Write the in-sentence right-context
294

  
295
		lexs = tNodesInSentence.subList(endPrevId + 1, tNodesInSentence.size()).collect {it.getAttribute('word')}
296
		rowDict['RightCxInsideSnt'] = lexs.join(" ")
297

  
298
		def inSntCxLengthRight = tNodesInSentence.size() - endPrevId
299

  
300
		return [rowDict, inSntCxLengthLeft, inSntCxLengthRight]
301
		return null;
302
	}
303

  
304
	public def toList(def iterable) {
305
		return iterable.findAll {true};
306
	}
307

  
308
	/**
309
	 * return the children of sNode with the id anId 
310
	 */
311
	public def idKey(def sNode, String anId) {
312
		return sNode.getElementsByTagName("*").find{it.getAttribute("id") == anId}
313
	}
314

  
315
	/**
316
	 * return a list of all terminal nodes of the node.
317
	 * if the node is a non-terminal, iterate over children and so on
318
	 */
319
	def getTNodes(theNode) {
320
		def terminals = []
321
		def unprocessed = [theNode]
322

  
323
		while( unprocessed.size() > 0) {
324
			def aNode = unprocessed.pop()
325
			def edges = toList(aNode.getElementsByTagName('edge'))
326
			if (edges.size() == 0)
327
				terminals.add(aNode)
328
			else
329
				for (def anEdge : edges)
330
					unprocessed.add(idKey(theNode.parentNode.parentNode, anEdge.getAttribute('idref')))
331
		}
332
		return terminals;
333
	}
334

  
335
	/**
336
	 * 
337
	 * @param theNode
338
	 * @return the join of the terminal nodes value in theNode a varaible node
339
	 */
340
	def writeLexForm(theNode) {
341
		def sNode = theNode.parentNode.parentNode // get the sentence of the variable
342
		def allTNodes = sNode.getElementsByTagName('t') // get all sentence children
343
		def tNodesInTheNode = getTNodes(theNode) // get the terminal nodes pointed by idref
344
		def begunNode = false
345
		def lexs = []
346
		def lexBuffer = []
347
		for (def aTNode : allTNodes){
348
			if ( aTNode in tNodesInTheNode) {
349
				begunNode = true
350
				lexs.addAll(lexBuffer)
351
				lexs.add(aTNode.getAttribute('word'))
352
				lexBuffer = []
353
			}
354
			if ( begunNode && !tNodesInTheNode.contains(aTNode))
355
				lexBuffer.add('[' + aTNode.getAttribute('word') + ']')
356
		}
357
		return lexs.join(" ");
358
	}
359

  
360
	def matchCombine(theDOM) {
361

  
362
		def matchesNodes = theDOM.getElementsByTagName('matches')
363

  
364
		for (def aMatchesNode : matchesNodes){
365

  
366
			def pivotNodes = toList(
367
					aMatchesNode.getElementsByTagName('variable')
368
					).findAll{it.getAttribute('name') == '#pivot'}
369

  
370
			def checkedPivots = []
371

  
372
			while (pivotNodes){
373

  
374
				def aPivotNode = pivotNodes.remove(0)
375

  
376
				def matchingPivotList = checkedPivots.findAll{
377
					it.getAttribute('idref') == aPivotNode.getAttribute('idref')
378
				}
379

  
380
				if (matchingPivotList) {
381

  
382
					// duplicate pivot; copy all variables
383

  
384

  
385
					for (def node : toList(aPivotNode.getParentNode()
386
					.getElementsByTagName('variable')
387
					)) {
388

  
389
						matchingPivotList[0].getParentNode().appendChild(node)
390

  
391
					}
392

  
393
					def variables = toList(
394
							matchingPivotList[0].getParentNode()
395
							.getElementsByTagName('variable')
396
							)
397

  
398
					// remove duplicates
399

  
400
					def checkedVariables = []
401

  
402
					while (variables) {
403

  
404
						def aVariableNode = variables.remove(0)
405

  
406
						def matchingVariableList = checkedVariables.findAll{
407
							(
408
									it.getAttribute('idref') == aVariableNode.getAttribute('idref')
409
									&& (
410
									it.getAttribute('name') == aVariableNode.getAttribute('name')
411
									|| (
412
									it.getAttribute('name').startsWith('#block')
413
									&& aVariableNode.getAttribute('name').startsWith('#block')
414
									)
415
									)
416
									)
417
						}
418

  
419
						if (matchingVariableList) {
420

  
421
							// remove variable node
422

  
423
							aVariableNode.getParentNode().removeChild(aVariableNode)
424

  
425
						}
426

  
427
						else {
428

  
429
							checkedVariables.add(aVariableNode)
430

  
431
						}
432
					}
433
				}
434
				else {
435

  
436
					// not the same pivot
437

  
438
					checkedPivots.add(aPivotNode)
439
				}
440
			}
441
			// Tidying up: remove empty <match /> nodes
442
			def matchNodes = toList(aMatchesNode.getElementsByTagName('match'))
443

  
444
			for (def matchNode : matchNodes) {
445

  
446
				if ( !toList(matchNode.getElementsByTagName('variable'))) {
447

  
448
					aMatchesNode.removeChild(matchNode)
449
				}
450
			}
451
		}
452

  
453
		return theDOM
454
	}
455

  
456
	public static void usage() {
457
		println 'concordance_blocks [OPTIONS] inputfile.xml [outputfile.csv]'
458
		println 'OPTIONS'
459
		println '-h, --help           Displays this message.'
460
		println '-c --context [length] Sets the number of words in context.'
461
	}
462

  
463
	static main(args) {
464
		if(args.length == 0)
465
			usage()
466
		int cx = 30;
467
		def ntTypes = ['cat'];
468
		def tTypes = ['pos'];
469
		File xmlfile = new File(args[0])
470
		File outfile = new File(args[1])
471
		ConcordanceBlocks p = new ConcordanceBlocks();
472
		println "START"
473
		p.process(xmlfile, outfile, cx, ntTypes, tTypes)
474
		println "END"
475
	}
476
}
tmp/org.txm.tigersearch.rcp/src/org/txm/export/ts/PunctInject.groovy (revision 713)
1
// Script to restore punctuation etc. to results of TS query.
2
// Inputs:
3
// --- Tiger-XML Document node
4
// --- Java array:
5
// --- --- String [index][type] where:
6
// --- --- --- type == 0 gives the xml:id
7
// --- --- --- type == 1 gives the word form
8
// Process:
9
// --- Injects punctuation.
10
// Returns:
11
// --- Tiger-XML Document node.
12
package org.txm.export.ts;
13

  
14
import javax.xml.parsers.DocumentBuilderFactory
15
import org.txm.searchengine.cqp.CqpDataProxy;
16
import org.txm.searchengine.cqp.corpus.Corpus
17
import org.txm.searchengine.cqp.corpus.CorpusManager;
18
import org.txm.searchengine.cqp.corpus.QueryResult;
19
import org.txm.searchengine.cqp.corpus.query.Query
20

  
21
public class PunctInject
22
{
23
	public process (def tigerXml, def txmIdWordTable) {
24

  
25
		def allTs = toList(tigerXml.getElementsByTagName('t'))
26

  
27
		def tAttrs = getTAttrNames(allTs[0])
28

  
29
		def tNodeIdPrefix = allTs[0].getAttribute('id').tokenize('#').first()
30

  
31
		def addedWordIds = []
32

  
33
		int tIx = 0
34

  
35
		while (txmIdWordTable) {
36

  
37
			def word = txmIdWordTable.remove(0)
38

  
39
			if (tIx == allTs.size()) {
40
				// End of TS file, but still words left in the BFM file.
41
				addTNode(word, allTs.last(), tAttrs, tNodeIdPrefix, 'append')
42
			}
43

  
44
			else {
45

  
46
				def tNode = allTs[tIx]
47

  
48
				def tId = getTNodeId(tNode)
49

  
50
				if (tId == word[0] && tNode.getAttribute('word') == word[1]) {
51

  
52
					// alles gut
53

  
54
					tIx += 1
55

  
56
				}
57

  
58
				else if (tId == word[0]) {
59

  
60
					println("Mismatched Ids! ($tId)")
61
					tIx += 1
62
				}
63

  
64
				else if (['#', '*'].contains(tNode.getAttribute('word')) ) {
65

  
66
					// SRCMF duplicata; try comparing word against the next tNode next time
67
					// around.
68

  
69
					txmIdWordTable.add(0, word)
70

  
71
					tIx += 1
72

  
73
				}
74
				
75
				// Check that the SRCMF corpus doesn't have a bug in it...
76

  
77
				else if ( !(word[1] =~ /[\,\.\?\!\:\;\(\)\[\]\{\}]/)
78
				&& (allTs[0..tIx - 1].find{
79
					it.getAttribute('id') == "$tNodeIdPrefix#$tId"
80
				})) {
81

  
82
					println "Warning: word ${tNode.getAttribute('word')}, id $tId appears twice in corpus!"
83

  
84
					txmIdWordTable.add(0, word)
85

  
86
					tIx += 1
87

  
88
				}
89
				
90
				// Check that there's not an extra word in the SRCMF corpus (rare, usu. a tokenisation change)
91
				
92
				else if ( !(word[1] =~ /[\,\.\?\!\:\;\(\)\[\]\{\}]/)
93
				&& (allTs[tIx..-1].find{
94
					it.getAttribute('id') == "$tNodeIdPrefix#${word[0]}"
95
				})) {
96
			
97
					println "Warning: word ${tNode.getAttribute('word')}, id $tId does not appear in BFM!"
98

  
99
					txmIdWordTable.add(0, word)
100
					
101
					tIx += 1
102
				}
103

  
104
				else if (addedWordIds.contains(tId)) {
105

  
106
					println "Warning: word ${tNode.getAttribute('word')}, id ${tId} out of sequence in SRCMF corpus!"
107

  
108
					txmIdWordTable.add(0, word)
109

  
110
					tIx += 1
111

  
112
				}
113

  
114
				else {
115

  
116
					// Insert word.  In the first instance, it will have the same parent as
117
					// the tNode before which it's being inserted.
118

  
119
					addTNode(word, allTs[tIx], tAttrs, tNodeIdPrefix, 'before')
120

  
121
					addedWordIds.add(word[0])
122

  
123
				}
124

  
125
			}
126

  
127
		}
128

  
129
		// Second phase: move punctuation into previous sentence,
130
		// dependent on sequence.
131

  
132
		def allTerminalses = toList(tigerXml.getElementsByTagName('terminals'))
133

  
134
		for (def i = 1 ; i < allTerminalses.size() ; i++) {
135

  
136
			def ts = toList(allTerminalses[i].getElementsByTagName('t'))
137

  
138
			def startPunc = true
139

  
140
			def puncStack = []
141

  
142
			while (ts && startPunc) {
143

  
144
				if ((ts[0].getAttribute('word') =~ /[A-zÀ-ÿ0-9]/).size() == 0) {
145

  
146
					puncStack.add(ts.remove(0))
147

  
148
				}
149

  
150
				else {
151

  
152
					startPunc = false
153

  
154
				}
155

  
156
			}
157

  
158
			// Now, treat the punctuation stack at the beginning of the sentence
159

  
160
			if ( puncStack ) {
161

  
162
				int moveLeft = 0
163

  
164
				// First, identify LAST instance of sentence-final punctuation.
165

  
166
				def puncString = puncStack.collect{ it.getAttribute('word')[0] }.join('')
167

  
168
				def matches = puncString =~ /[\.\,\;\:\!\?\)\]\}»”’]/
169

  
170
				if (matches.size() > 0) {
171

  
172
					moveLeft = puncString.lastIndexOf(matches[-1]) + 1
173

  
174
				}
175

  
176
				// Second, split pairs of straight quotes
177

  
178
				matches = puncString =~ /(""|'')/ //"
179

  
180
				if (matches.size() > 0) {
181

  
182
					moveLeft = [moveLeft, puncString.lastIndexOf(matches[-1][0]) + 1].max()
183
				}
184

  
185
				// Now, move moveLeft punctuation nodes to the end of the prev. sentence
186

  
187
				ts = toList(allTerminalses[i].getElementsByTagName('t'))
188

  
189
				for (def j = 0 ; j < moveLeft ; j++ ) {
190

  
191
					allTerminalses[i - 1].appendChild(ts[j])
192

  
193
				}
194

  
195
			}
196
		}
197
		return tigerXml
198

  
199
	}
200

  
201
	private addTNode(word, tNode, tAttrs, tNodeIdPrefix, where) {
202

  
203
		def newTNode = tNode.getOwnerDocument().createElement('t')
204

  
205
		for (def anAttr : tAttrs) {
206

  
207
			if (anAttr == 'id') {
208

  
209
				newTNode.setAttribute('id', "${tNodeIdPrefix}#${word[0]}")
210

  
211
			}
212

  
213
			else if (anAttr == 'word') {
214

  
215
				newTNode.setAttribute('word', word[1])
216

  
217
			}
218

  
219
			else {
220

  
221
				newTNode.setAttribute(anAttr, '--')
222

  
223
			}
224

  
225
		}
226

  
227
		if (where == 'before') {
228

  
229
			tNode.getParentNode().insertBefore(newTNode, tNode)
230

  
231
		}
232

  
233
		else if (where == 'append') {
234

  
235
			tNode.getParentNode().appendChild(newTNode)
236

  
237
		}
238

  
239
		else {
240

  
241
			throw new IllegalArgumentException('Bad before value')
242

  
243
		}
244

  
245
	}
246

  
247
	public getTAttrNames(tNode) {
248

  
249
		def nodeMap = tNode.attributes
250

  
251
		def nameList = []
252

  
253
		for ( def i = 0 ; i < nodeMap.getLength() ; i++ ) {
254
			nameList.add( nodeMap.item(i).nodeName )
255
		}
256

  
257
		return nameList
258

  
259
	}
260

  
261
	public getTNodeId(tNode) {
262

  
263
		return tNode.getAttribute('id').tokenize('#').last()
264

  
265
	}
266

  
267
	public def toList(def iterable) {
268
		return iterable.findAll {true};
269
	}
270

  
271
	public static def getWords(String corpusname, String query)
272
	{
273
		CorpusManager cm = CorpusManager.getCorpusManager();
274
		Corpus corpus = cm.getCorpus(corpusname);
275
		def word_property = corpus.getProperty("word")
276
		def id_property = corpus.getProperty("id")
277

  
278
		def wordCache = cm.getCorpusProxies(corpus).get(word_property);
279
		def idCache = cm.getCorpusProxies(corpus).get(id_property);
280

  
281
		def positions = new int[corpus.getSize()];
282
		for(int i = 0 ; i< corpus.getSize() ; i++)
283
			positions[i] = i;
284
		def word_values = wordCache.getData(positions)
285
		def id_values = idCache.getData(positions)
286
		ArrayList<String[]> words = new ArrayList<String[]>(corpus.getSize());
287
		for(int p : positions)
288
		{
289
			if(id_values[p].startsWith("w"))
290
			{
291
				words.add(new String[2])
292
				words[p][0] = id_values[p]
293
				words[p][1] = word_values[p]
294
			}
295
		}
296

  
297
		return words;
298
	}
299

  
300
	public static void main(String[] args)
301
	{
302
		def words = [
303
			["w203_1", "Dominedeu"],
304
			["w203_2", "devemps"],
305
			["w203_3", "lauder"],
306
			["w203_4", "et"],
307
			["w203_5", "a"],
308
			["w203_6", "sus"],
309
			["w203_7", "sancz"],
310
			["w203_8", "honor"],
311
			["w203_9", "porter"],
312
			["w203_10", "»"],
313
			["w203_10.2", ")"],
314
			["w203_10.3", '.'],
315
			["w203_10.5", '"'],
316
			["w203_10.7", '"'],
317
			["w203_11", "in"],
318
			["w203_12", "su'"],
319
			["w203_13", "amor"],
320
			["w203_14", "cantomps"],
321
			["w203_15", "del"],
322
			["w203_16", "·sanz"],
323
			["w203_17", "quae"],
324
			["w203_18", "por"],
325
			["w203_19", "lui"],
326
			["w203_20", "augrent"],
327
			["w203_21", "granz"],
328
			["w203_22", "aanz"],
329
			["w203_23", "."],
330
		];
331
		File tigerXml = new File(args[0]);
332
		def factory = DocumentBuilderFactory.newInstance()
333
		factory.setXIncludeAware(true)
334
		def builder = factory.newDocumentBuilder()
335
		def THEDOM = builder.parse(tigerXml).documentElement
336

  
337
		println THEDOM.getClass()
338

  
339
		def NEWDOM = new PunctInject().process(THEDOM, words);
340

  
341
		println NEWDOM
342

  
343
		// File outfile = new File("outfile.xml")
344
		// println outfile
345
		//outfile.withWriter("iso-8859-1"){writer ->
346
		//writer.write(NEWDOM.toString())
347
		// }
348
	}
349
}
tmp/org.txm.tigersearch.rcp/src/org/txm/function/tigersearch/TIGERSearch.java (revision 713)
22 22

  
23 23
public class TIGERSearch extends TXMResult implements IAdaptable {
24 24

  
25
	TSCorpusManager manager;
26
	TSCorpus tscorpus;
27
	boolean ready = false;
28
	private TSResult tsresult;
29
	String T, NT;
30
	private MainCorpus corpus;
31
	private String query;
25
	protected TSCorpusManager manager;
26
	protected TSCorpus tscorpus;
27
	protected boolean ready = false;
28
	protected TSResult tsresult;
29
	protected String T, NT;
30
	protected MainCorpus corpus;
31
	protected String query;
32 32

  
33
	public MainCorpus getCorpus() {
34
		return corpus;
35
	}
36
	
33 37
	public TIGERSearch(Corpus corpus) {
34 38
		super(corpus);
35 39
		this.corpus = corpus.getMainCorpus();
......
166 170
		@Override
167 171
		public String getLabel(Object o) {
168 172
			String q = ((TIGERSearch) o).getQuery();
169
			return q.substring(0, Math.min(10, q.length())).replaceAll("\n", "")+"...";
173
			if (q != null) {
174
				return q.substring(0, Math.min(10, q.length())).replaceAll("\n", "")+"...";
175
			} else {
176
				return ((TIGERSearch) o).getCorpus().getName();
177
			}
170 178
		}
171 179

  
172 180
		@Override
......
243 251

  
244 252
	@Override
245 253
	public boolean canCompute() {
246
		return corpus != null && query.length() > 0;
254
		return corpus != null && query != null && query.length() > 0;
247 255
	}
248 256

  
249 257
	@Override
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TSResult.java (revision 713)
281 281
	public static String CONCMOTPIVOT = "concordance_mot-pivot"; //$NON-NLS-1$
282 282
	public static String CONCBLOCKS = "concordance_blocks"; //$NON-NLS-1$
283 283
	public static String[] EXPORTMETHODS = {CONCSIMPLE, CONCMOTPIVOT, CONCBLOCKS};
284
	public boolean toConcordance(File csvFile, String method, int cx, ArrayList<String> ntTypes, ArrayList<String> tTypes, boolean punct) throws Exception
284
	public boolean toConcordance(File csvFile, String method, int cx, List<String> list, List<String> list2, boolean punct) throws Exception
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff