Revision 399

tmp/org.txm.libs.cqp/META-INF/MANIFEST.MF (revision 399)
4 4
Bundle-SymbolicName: org.txm.libs.cqp;singleton:=true
5 5
Bundle-Version: 1.0.0.qualifier
6 6
Bundle-Activator: org.txm.libs.cqp.Activator
7
Require-Bundle: org.eclipse.core.runtime,
8
 org.txm.core,
9
 org.eclipse.ui.workbench,
10
 org.eclipse.jface
7
Require-Bundle: org.eclipse.core.runtime;visibility:=reexport,
8
 org.txm.core;visibility:=reexport,
9
 org.eclipse.jface;visibility:=reexport,
10
 org.eclipse.ui.workbench;visibility:=reexport
11 11
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
12 12
Bundle-ActivationPolicy: lazy
13 13
Export-Package: org.txm.libs.cqp
tmp/org.txm.searchengine.core/build.properties (revision 399)
1 1
source.. = src/
2 2
output.. = bin/
3 3
bin.includes = META-INF/,\
4
               .
4
               .,\
5
               plugin.xml
tmp/org.txm.searchengine.core/plugin.xml (revision 399)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<?eclipse version="3.4"?>
3
<plugin>
4
   <extension-point id="org.txm.searchengine.core.SearchEngine" name="Search Engine" schema="schema/org.txm.searchengine.exsd"/>
5
   <extension
6
         point="org.txm.EnginesManager">
7
      <EngineManager
8
            class="org.txm.searchengine.core.SearchEngines"
9
            description="Manage the search engines"
10
            name="org.txm.searchengine.core.SearchEngines">
11
      </EngineManager>
12
   </extension>
13

  
14
</plugin>
0 15

  
tmp/org.txm.searchengine.core/META-INF/MANIFEST.MF (revision 399)
1 1
Manifest-Version: 1.0
2 2
Bundle-ManifestVersion: 2
3 3
Bundle-Name: Searchengine core
4
Bundle-SymbolicName: org.txm.searchengine.core
4
Bundle-SymbolicName: org.txm.searchengine.core;singleton:=true
5 5
Bundle-Version: 1.0.0.qualifier
6 6
Bundle-Activator: org.txm.searchengine.core.Activator
7 7
Require-Bundle: org.eclipse.core.runtime,
8
 org.txm.core;bundle-version="0.7.0"
8
 org.txm.core;bundle-version="0.7.0",
9
 org.txm.utils
9 10
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
10 11
Bundle-ActivationPolicy: lazy
11 12
Export-Package: org.txm.searchengine.core
tmp/org.txm.searchengine.core/schema/org.txm.searchengine.exsd (revision 399)
1
<?xml version='1.0' encoding='UTF-8'?>
2
<!-- Schema file written by PDE -->
3
<schema targetNamespace="org.txm.searchengine.core" xmlns="http://www.w3.org/2001/XMLSchema">
4
<annotation>
5
      <appinfo>
6
         <meta.schema plugin="org.txm.searchengine.core" id="org.txm.searchengine.core.SearchEngine" name="Search Engine"/>
7
      </appinfo>
8
      <documentation>
9
         [Enter description of this extension point.]
10
      </documentation>
11
   </annotation>
12

  
13
   <element name="extension">
14
      <annotation>
15
         <appinfo>
16
            <meta.element />
17
         </appinfo>
18
      </annotation>
19
      <complexType>
20
         <choice minOccurs="1" maxOccurs="unbounded">
21
            <element ref="SearchEngine"/>
22
         </choice>
23
         <attribute name="point" type="string" use="required">
24
            <annotation>
25
               <documentation>
26
                  
27
               </documentation>
28
            </annotation>
29
         </attribute>
30
         <attribute name="id" type="string">
31
            <annotation>
32
               <documentation>
33
                  
34
               </documentation>
35
            </annotation>
36
         </attribute>
37
         <attribute name="name" type="string">
38
            <annotation>
39
               <documentation>
40
                  
41
               </documentation>
42
               <appinfo>
43
                  <meta.attribute translatable="true"/>
44
               </appinfo>
45
            </annotation>
46
         </attribute>
47
      </complexType>
48
   </element>
49

  
50
   <element name="SearchEngine">
51
      <complexType>
52
         <attribute name="class" type="string">
53
            <annotation>
54
               <documentation>
55
                  
56
               </documentation>
57
               <appinfo>
58
                  <meta.attribute kind="java" basedOn="org.txm.searchengine.core.SearchEngine:"/>
59
               </appinfo>
60
            </annotation>
61
         </attribute>
62
      </complexType>
63
   </element>
64

  
65
   <annotation>
66
      <appinfo>
67
         <meta.section type="since"/>
68
      </appinfo>
69
      <documentation>
70
         [Enter the first release in which this extension point appears.]
71
      </documentation>
72
   </annotation>
73

  
74
   <annotation>
75
      <appinfo>
76
         <meta.section type="examples"/>
77
      </appinfo>
78
      <documentation>
79
         [Enter extension point usage example here.]
80
      </documentation>
81
   </annotation>
82

  
83
   <annotation>
84
      <appinfo>
85
         <meta.section type="apiinfo"/>
86
      </appinfo>
87
      <documentation>
88
         [Enter API information here.]
89
      </documentation>
90
   </annotation>
91

  
92
   <annotation>
93
      <appinfo>
94
         <meta.section type="implementation"/>
95
      </appinfo>
96
      <documentation>
97
         [Enter information about supplied implementation of this extension point.]
98
      </documentation>
99
   </annotation>
100

  
101

  
102
</schema>
0 103

  
tmp/org.txm.searchengine.core/src/org/txm/searchengine/core/SearchEngines.java (revision 399)
6 6
import org.txm.Engine;
7 7
import org.txm.EngineType;
8 8
import org.txm.EnginesManager;
9
import org.txm.Toolbox;
9 10

  
10 11
public class SearchEngines extends EnginesManager<SearchEngine> {
11 12

  
12 13
	@Override
13 14
	public boolean startEngines() {
15
		
14 16
		for (Engine e : engines.values()) {
15 17
			SearchEngine se = (SearchEngine)e;
16
			System.out.println("Starting searchengine: "+ se.getName());
18
			System.out.println("Starting "+ se.getName()+" searchengine.");
19
			se.start();
17 20
		}
18
		return false;
21
		return true;
19 22
	}
20 23

  
21 24
	@Override
22 25
	public boolean stopEngines() {
23
		// TODO Auto-generated method stub
24
		return false;
26
		for (Engine e : engines.values()) {
27
			SearchEngine se = (SearchEngine)e;
28
			System.out.println("Stoping "+ se.getName()+" searchengine.");
29
			se.stop();
30
		}
31
		return true;
25 32
	}
26 33

  
27 34
	@Override
......
30 37
	}
31 38

  
32 39
	public static SearchEngine getCQPEngine() {
33
		return (SearchEngine) engines.get("CQP");
40
		return (SearchEngine) Toolbox.getEngineManager(EngineType.SEARCH).getEngine("CQP");
34 41
	}
35 42

  
36 43
	public static SearchEngine getTIGERSearchEngine() {
37
		return (SearchEngine) engines.get("TIGERSearch");
44
		return (SearchEngine) Toolbox.getEngineManager(EngineType.SEARCH).getEngine("TIGERSearch");
38 45
	}
39 46

  
40 47
	@Override
41 48
	protected boolean fetchEngines() {
49
		
42 50
		IConfigurationElement[] contributions = Platform.getExtensionRegistry().getConfigurationElementsFor(SearchEngine.EXTENSION_POINT_ID);
43

  
51
		//System.out.println("search engine contributions: "+SearchEngine.EXTENSION_POINT_ID);
44 52
		for (int i = 0; i < contributions.length; i++) {
45 53
			try {
46 54
				SearchEngine e = (SearchEngine)contributions[i].createExecutableExtension("class"); //$NON-NLS-1$
47
				// register something with the SearchEngine ?
48
				engines.put(e.getName(), e);
55
				if (e.initialize()) {
56
					engines.put(e.getName(), e);
57
				} else {
58
					System.out.println("Fail to initialize "+e.getName()+" search engine.");
59
				}
49 60
			} catch(CoreException e) {
61
				System.out.println("Fail instanciate "+contributions[i].getName()+": "+e.getLocalizedMessage());
50 62
				e.printStackTrace();
51 63
			} 
52 64
		}
53
		return false;
65
		
66
		return engines.size() > 0;
54 67
	}
55 68
}
tmp/CQP/.project (revision 399)
1 1
<?xml version="1.0" encoding="UTF-8"?>
2 2
<projectDescription>
3
	<name>CQP</name>
3
	<name>org.txm.searchengine.cqp.core</name>
4 4
	<comment></comment>
5 5
	<projects>
6 6
	</projects>
tmp/CQP/src/org/txm/test/TestSubcorpus.java (revision 399)
1
package org.txm.test;
2

  
3
import java.io.File;
4
import java.io.FileWriter;
5
import java.io.IOException;
6
import java.io.PrintWriter;
7
import java.util.Arrays;
8

  
9
import org.txm.searchengine.cqp.NetCqiClient;
10
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException;
11
import org.txm.searchengine.cqp.corpus.Corpus;
12
import org.txm.searchengine.cqp.corpus.CorpusManager;
13
import org.txm.searchengine.cqp.corpus.Subcorpus;
14
import org.txm.searchengine.cqp.corpus.query.Query;
15
import org.txm.searchengine.cqp.serverException.CqiServerError;
16

  
17
public class TestSubcorpus {
18

  
19
	public static String CORPUS = "DISCOURS"; //$NON-NLS-1$
20

  
21
	public static boolean allTests(File outdir) {
22
		TestSubcorpus tester = new TestSubcorpus();
23
		return tester.test1(outdir) && tester.test2(outdir) && tester.test3(outdir);
24
	}
25

  
26
	public boolean test1(File outdir) {
27
		try {
28
			Corpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUS);
29
			Subcorpus sub = corpus.createSubcorpus(new Query("\"je\" expand to p"), "JEP"); //$NON-NLS-1$ //$NON-NLS-2$
30

  
31
			writePositions(sub, new File(outdir, TestSubcorpus.class.getName()+"_test1.csv")); //$NON-NLS-1$
32
		} catch (Exception e) {
33
			org.txm.utils.logger.Log.printStackTrace(e);
34
			return false;
35
		}
36
		return true;
37
	}
38

  
39
	public boolean test2(File outdir) {
40
		try {
41
			Corpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUS);
42
			Subcorpus sub = corpus.createSubcorpus(corpus.getStructuralUnit("text"), corpus.getStructuralUnit("text").getProperty("loc"), "De_Gaulle", "DG"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
43

  
44
			writePositions(sub, new File(outdir, TestSubcorpus.class.getName()+"_test2.csv")); //$NON-NLS-1$
45
		} catch (Exception e) {
46
			org.txm.utils.logger.Log.printStackTrace(e);
47
			return false;
48
		}
49
		return true;
50
	}
51

  
52
	public boolean test3(File outdir) {
53
		try {
54
			Corpus corpus = CorpusManager.getCorpusManager().getCorpus(CORPUS);
55
			Subcorpus sub = corpus.createSubcorpus(corpus.getStructuralUnit("text"), corpus.getStructuralUnit("text").getProperty("type"), Arrays.asList("Allocution radiotélévisée", "Conférence de presse"), "alloc+presse"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
56

  
57
			writePositions(sub, new File(outdir, TestSubcorpus.class.getName()+"_test3.csv")); //$NON-NLS-1$
58
		} catch (Exception e) {
59
			org.txm.utils.logger.Log.printStackTrace(e);
60
			return false;
61
		}
62
		return true;
63
	}
64

  
65
	private void writePositions(Subcorpus sub, File outfile) throws UnexpectedAnswerException, IOException, CqiServerError {
66
		int[] positions = CorpusManager.getCorpusManager().getCqiClient().dumpSubCorpus(sub.getQualifiedCqpId(), NetCqiClient.CQI_CONST_FIELD_MATCH, 0, sub.getNMatch());
67
		PrintWriter writer = new PrintWriter(new FileWriter(outfile));
68
		for(int i : positions) writer.println(i);
69
		writer.close();
70
	}
71
}
0 72

  
tmp/CQP/src/org/txm/test/package.html (revision 399)
1
<html>
2
<body>
3
<p>First draft of tests for TXM</p>
4
</body>
5
</html>
0 6

  
tmp/CQP/src/org/txm/importer/cwb/package.html (revision 399)
1
<html>
2
<body>
3
<p>CWB utils. Wraps cwb-encode and cwb-makeall</p>
4
</body>
5
</html>
0 6

  
tmp/CQP/src/org/txm/importer/cwb/CwbDecode.java (revision 399)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun., 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.cwb;
29

  
30
import java.util.Date;
31
import java.util.List;
32
import java.util.Locale;
33
import java.text.DateFormat;
34
import java.io.BufferedReader;
35
import java.io.IOException;
36
import java.io.InputStream;
37
import java.io.InputStreamReader;
38
import java.util.ArrayList;
39

  
40
// Decode CWB indexes
41
/**
42
 * The Class CwbDecode.
43
 */
44
public class CwbDecode {
45

  
46
	/** The binpath. */
47
	String binpath = "";
48

  
49
	/**
50
	 * Instantiates a new cwb decode.
51
	 *
52
	 * @param binpath the binpath
53
	 */
54
	public CwbDecode(String binpath) {
55
		this.binpath = binpath;
56
	}
57

  
58
	/** The version. */
59
	String version = "0.0.0";
60

  
61
	/** The desc. */
62
	String desc = "Decode CWB indexes";
63

  
64
	/** The debug. */
65
	boolean debug = false;
66

  
67
	/**
68
	 * Debug.
69
	 *
70
	 * @param b the b
71
	 */
72
	public void debug(boolean b) {
73
		debug = b;
74
	};
75

  
76
	// Lisp output mode
77

  
78
	/** The is l. */
79
	private Boolean isL = false;
80

  
81
	/**
82
	 * Sets the l.
83
	 */
84
	public void setL() {
85
		this.isL = true;
86
	}
87

  
88
	/**
89
	 * Unset l.
90
	 */
91
	public void unsetL() {
92
		this.isL = false;
93
	}
94

  
95
	// concordance line ('horizonta'l) output mode
96

  
97
	/** The is h. */
98
	private Boolean isH = false;
99

  
100
	/**
101
	 * Sets the h.
102
	 */
103
	public void setH() {
104
		this.isH = true;
105
	}
106

  
107
	/**
108
	 * Unset h.
109
	 */
110
	public void unsetH() {
111
		this.isH = false;
112
	}
113

  
114
	// compact output mode (suitable for encode)
115

  
116
	/** The is c. */
117
	private Boolean isC = false;
118

  
119
	/**
120
	 * Sets the c.
121
	 */
122
	public void setC() {
123
		this.isC = true;
124
	}
125

  
126
	/**
127
	 * Unset c.
128
	 */
129
	public void unsetC() {
130
		this.isC = false;
131
	}
132

  
133
	// XML-compatible compact output (for {encode -x ...}))
134

  
135
	/** The is cx. */
136
	private Boolean isCx = false;
137

  
138
	/**
139
	 * Sets the cx.
140
	 */
141
	public void setCx() {
142
		this.isCx = true;
143
	}
144

  
145
	/**
146
	 * Unset cx.
147
	 */
148
	public void unsetCx() {
149
		this.isCx = false;
150
	}
151

  
152
	// XML output mode
153

  
154
	/** The is x. */
155
	private Boolean isX = false;
156

  
157
	/**
158
	 * Sets the x.
159
	 */
160
	public void setX() {
161
		this.isX = true;
162
	}
163

  
164
	/**
165
	 * Unset x.
166
	 */
167
	public void unsetX() {
168
		this.isX = false;
169
	}
170

  
171
	// show corpus position ('numbers')
172

  
173
	/** The isn. */
174
	private Boolean isn = false;
175

  
176
	/**
177
	 * Setn.
178
	 */
179
	public void setn() {
180
		this.isn = true;
181
	}
182

  
183
	/**
184
	 * Unsetn.
185
	 */
186
	public void unsetn() {
187
		this.isn = false;
188
	}
189

  
190
	// first token to print (at corpus position {n})
191

  
192
	/** The iss. */
193
	private Boolean iss = false;
194

  
195
	/** The s. */
196
	private int s;
197

  
198
	/**
199
	 * Sets the s.
200
	 *
201
	 * @param arg the new s
202
	 */
203
	public void sets(int arg) {
204
		this.iss = true;
205
		this.s = arg;
206
	}
207

  
208
	/**
209
	 * Unsets.
210
	 */
211
	public void unsets() {
212
		this.iss = false;
213
	}
214

  
215
	// last token to print (at corpus position {n})
216

  
217
	/** The ise. */
218
	private Boolean ise = false;
219

  
220
	/** The e. */
221
	private int e;
222

  
223
	/**
224
	 * Sets the e.
225
	 *
226
	 * @param arg the new e
227
	 */
228
	public void sete(int arg) {
229
		this.ise = true;
230
		this.e = arg;
231
	}
232

  
233
	/**
234
	 * Unsete.
235
	 */
236
	public void unsete() {
237
		this.ise = false;
238
	}
239

  
240
	// set registry directory
241

  
242
	/** The isr. */
243
	private Boolean isr = false;
244

  
245
	/** The r. */
246
	private String r;
247

  
248
	/**
249
	 * Sets the r.
250
	 *
251
	 * @param arg the new r
252
	 */
253
	public void setr(String arg) {
254
		this.isr = true;
255
		this.r = arg;
256
	}
257

  
258
	/**
259
	 * Unsetr.
260
	 */
261
	public void unsetr() {
262
		this.isr = false;
263
	}
264

  
265
	// matchlist mode (input from stdin)
266

  
267
	/** The isp. */
268
	private Boolean isp = false;
269

  
270
	/**
271
	 * Setp.
272
	 */
273
	public void setp() {
274
		this.isp = true;
275
	}
276

  
277
	/**
278
	 * Unsetp.
279
	 */
280
	public void unsetp() {
281
		this.isp = false;
282
	}
283

  
284
	// matchlist mode (input from {file})
285

  
286
	/** The isf. */
287
	private Boolean isf = false;
288

  
289
	/** The f. */
290
	private String f;
291

  
292
	/**
293
	 * Sets the f.
294
	 *
295
	 * @param arg the new f
296
	 */
297
	public void setf(String arg) {
298
		this.isf = true;
299
		this.f = arg;
300
	}
301

  
302
	/**
303
	 * Unsetf.
304
	 */
305
	public void unsetf() {
306
		this.isf = false;
307
	}
308

  
309
	// this help page
310

  
311
	/** The ish. */
312
	private Boolean ish = false;
313

  
314
	/**
315
	 * Seth.
316
	 */
317
	public void seth() {
318
		this.ish = true;
319
	}
320

  
321
	/**
322
	 * Unseth.
323
	 */
324
	public void unseth() {
325
		this.ish = false;
326
	}
327

  
328
	// print p-attribute {att}
329

  
330
	/** The is p. */
331
	private Boolean isP = false;
332

  
333
	/** The P. */
334
	private List<String> P;
335

  
336
	/**
337
	 * Sets the p.
338
	 *
339
	 * @param arg the new p
340
	 */
341
	public void setP(List<String> arg) {
342
		this.isP = true;
343
		this.P = arg;
344
	}
345

  
346
	/**
347
	 * Unset p.
348
	 */
349
	public void unsetP() {
350
		this.isP = false;
351
	}
352

  
353
	// print s-attribute {att} (possibly including annotations)
354

  
355
	/** The is s. */
356
	private Boolean isS = false;
357

  
358
	/** The S. */
359
	private List<String> S;
360

  
361
	/**
362
	 * Sets the s.
363
	 *
364
	 * @param arg the new s
365
	 */
366
	public void setS(List<String> arg) {
367
		this.isS = true;
368
		this.S = arg;
369
	}
370

  
371
	/**
372
	 * Unset s.
373
	 */
374
	public void unsetS() {
375
		this.isS = false;
376
	}
377

  
378
	// show s-attribute annotation for each range in matchlist mode
379

  
380
	/** The is v. */
381
	private Boolean isV = false;
382

  
383
	/** The V. */
384
	private List<String> V;
385

  
386
	/**
387
	 * Sets the v.
388
	 *
389
	 * @param arg the new v
390
	 */
391
	public void setV(List<String> arg) {
392
		this.isV = true;
393
		this.V = arg;
394
	}
395

  
396
	/**
397
	 * Unset v.
398
	 */
399
	public void unsetV() {
400
		this.isV = false;
401
	}
402

  
403
	// print alignment attribute {att}
404

  
405
	/** The is a. */
406
	private Boolean isA = false;
407

  
408
	/** The A. */
409
	private List<String> A;
410

  
411
	/**
412
	 * Sets the a.
413
	 *
414
	 * @param arg the new a
415
	 */
416
	public void setA(List<String> arg) {
417
		this.isA = true;
418
		this.A = arg;
419
	}
420

  
421
	/**
422
	 * Unset a.
423
	 */
424
	public void unsetA() {
425
		this.isA = false;
426
	}
427

  
428
	// print all p-attributes and s-attributes
429

  
430
	/** The is all. */
431
	private Boolean isALL = false;
432

  
433
	/**
434
	 * Sets the all.
435
	 */
436
	public void setALL() {
437
		this.isALL = true;
438
	}
439

  
440
	/**
441
	 * Unset all.
442
	 */
443
	public void unsetALL() {
444
		this.isALL = false;
445
	}
446

  
447
	// expand ranges to full {att} region (matchlist mode)
448

  
449
	/** The isc. */
450
	private Boolean isc = false;
451

  
452
	/** The c. */
453
	private String c;
454

  
455
	/**
456
	 * Sets the c.
457
	 *
458
	 * @param arg the new c
459
	 */
460
	public void setc(String arg) {
461
		this.isc = true;
462
		this.c = arg;
463
	}
464

  
465
	/**
466
	 * Unsetc.
467
	 */
468
	public void unsetc() {
469
		this.isc = false;
470
	}
471

  
472
	String[] options;
473
	public void setAdditionalOptions(String[] options) {
474
		this.options = options;
475
	}
476

  
477
	/**
478
	 * Cwbdecode.
479
	 *
480
	 * @param corpus the corpus
481
	 */
482
	public void cwbdecode(String corpus) throws IOException
483
	// arg : corpus name
484
	{
485
		ArrayList<String> args = new ArrayList<String>();
486
		if(System.getProperty("os.name").contains("Windows")) {
487
			args.add(binpath + "cwb-decode.exe");	
488
		} else {
489
			args.add(binpath + "cwb-decode");
490
		}
491
		if (isL)
492
			args.add("-L");
493
		if (isH)
494
			args.add("-H");
495
		if (isC)
496
			args.add("-C");
497
		if (isCx)
498
			args.add("-Cx");
499
		if (isX)
500
			args.add("-X");
501
		if (isn)
502
			args.add("-n");
503
		if (iss) {
504
			args.add("-s");
505
			args.add("" + s);
506
		}
507
		if (ise) {
508
			args.add("-e");
509
			args.add("" + e);
510
		}
511
		if (isr) {
512
			args.add("-r");
513
			args.add("" + r);
514
		}
515
		if (isp)
516
			args.add("-p");
517
		if (isf) {
518
			args.add("-f");
519
			args.add("" + f);
520
		}
521
		args.add("" + corpus);
522
		if (ish)
523
			args.add("-h");
524
		if (isP)
525
			for (int c = 0; c < P.size(); c++) {
526
				args.add("-P");
527
				args.add("" + P.get(c));
528
			}
529
		if (isS)
530
			for (int c = 0; c < S.size(); c++) {
531
				args.add("-S");
532
				args.add("" + S.get(c));
533
			}
534
		if (isV)
535
			for (int c = 0; c < V.size(); c++) {
536
				args.add("-V");
537
				args.add("" + V.get(c));
538
			}
539
		if (isA)
540
			for (int c = 0; c < A.size(); c++) {
541
				args.add("-A");
542
				args.add("" + A.get(c));
543
			}
544
		if (isALL)
545
			args.add("-ALL");
546
		if (isc) {
547
			args.add("-c");
548
			args.add("" + c);
549
		}
550

  
551
		if (options != null) {
552
			for (String opt : options) {
553
				args.add(opt);
554
			}
555
		}
556

  
557
		ProcessBuilder pb = new ProcessBuilder(args);
558
		pb.redirectErrorStream(false);
559
		Process process = null;
560
		try {
561
			process = pb.start();
562
		} catch (IOException e) {
563
			System.err.println(e);
564
		}
565
		InputStream is = process.getInputStream();
566
		InputStreamReader isr = new InputStreamReader(is);
567
		BufferedReader br = new BufferedReader(isr);
568
		String line;
569
		while ((line = br.readLine()) != null) {
570
			System.out.println(line);
571
		}
572
		int e = 0;
573
		try {
574
			e = process.waitFor();
575
		} catch (Exception err) {
576
		}
577
		if (e != 0) {
578
			System.err.println("Process exited abnormally with code "
579
					+ e
580
					+ " at "
581
					+ DateFormat.getDateInstance(DateFormat.FULL, Locale.UK)
582
					.format(new Date()));
583

  
584
			for (int c = 0; c < args.size(); c++)
585
				System.out.print("" + args.get(c) + " ");
586
			System.out.println();
587
		}
588
	}
589

  
590
	/**
591
	 * The main method.
592
	 *
593
	 * @param args the arguments
594
	 */
595
	public static void main(String[] args) {
596
		CwbDecode tt = new CwbDecode("");
597
	}
598
}
0 599

  
tmp/CQP/src/org/txm/importer/cwb/FixMilestoneDeclarations.java (revision 399)
1
package org.txm.importer.cwb;
2

  
3
import java.io.BufferedReader;
4
import java.io.BufferedWriter;
5
import java.io.File;
6
import java.io.FileReader;
7
import java.io.FileWriter;
8
import java.io.IOException;
9
import java.util.ArrayList;
10
import java.util.HashMap;
11

  
12
/**
13
 * Remove milestone declarations and index files. Milestones are empty structures that cannot be used with CQL
14
 * 
15
 * @author mdecorde
16
 *
17
 */
18
public class FixMilestoneDeclarations {
19
	File registryFile;
20
	File dataDirectory;
21
	boolean debug = false;
22
	
23
	public static final String[] exts = { ".avs", ".avx", ".rng"};
24

  
25
	public FixMilestoneDeclarations(File registryFile, File dataDirectory) {
26
		this.registryFile = registryFile;
27
		this.dataDirectory = dataDirectory;
28
	}
29

  
30
	
31
	public boolean process() throws IOException {
32
		if (!dataDirectory.exists()) return false;
33
		if (!registryFile.exists()) return false;
34

  
35
		// get structural units and their properties
36
		ArrayList<String> structures = new ArrayList<String>();
37
		HashMap<String, ArrayList<String>> structureAttributes = new HashMap<String, ArrayList<String>>();
38
		BufferedReader reader = new BufferedReader(new FileReader(registryFile));
39
		String line = reader.readLine();
40
		String structure = "";
41
		while (line != null) {
42

  
43
			if (line.startsWith("STRUCTURE ")) {
44
				if ( !line.contains("_")) {
45
					structure = line.substring(10);
46
					structures.add(structure);
47
					structureAttributes.put(structure, new ArrayList<String>());
48
				} else {
49
					line = line.replaceAll("[ ]+# \\[annotations\\]", "");
50
					structureAttributes.get(structure).add(line.substring(10));
51
				}
52

  
53
			}
54
			line = reader.readLine();
55
		}
56
		reader.close();
57
		if (debug) System.out.println("Structures: "+structures);
58

  
59
		File[] files = dataDirectory.listFiles();
60
		if (files == null) return false;
61

  
62
		//removing empty data files
63
		ArrayList<String> linesToRemove = new ArrayList<String>();
64
		for (String s : structures) {
65
			File f = new File(dataDirectory, s+".rng");
66

  
67
			if (f.exists() && f.length() == 0) {
68
				if (debug) System.out.println("Milestone detected : "+s);
69
				f.delete();
70
				linesToRemove.add(s);
71

  
72
				if (debug) System.out.println("Removing files: "+structureAttributes.get(s));
73
				for (String attribute : structureAttributes.get(s)) {
74
					linesToRemove.add(attribute);
75
					for (String ext : exts) {
76
						new File(dataDirectory, attribute+ext).delete();
77
					}
78
				}
79
				continue;
80
			}
81

  
82
			if (!f.exists()) {
83
				if (debug) System.out.println("Structure "+s+" RNG file is missing: "+f);
84
				linesToRemove.add(s);
85
			}
86
		}
87

  
88
		// remove structure declarations
89
		if (debug) System.out.println("Attributes to remove from registry files: "+linesToRemove);
90
		reader = new BufferedReader(new FileReader(registryFile));
91
		File registryFile_tmp = new File(registryFile.getAbsolutePath()+".tmp");
92
		BufferedWriter writer = new BufferedWriter(new FileWriter(registryFile_tmp));
93
		line = reader.readLine();
94
		while (line != null) {
95
			
96
			if (line.startsWith("STRUCTURE ")) {
97
				
98
				structure = line.substring(10).replaceAll("[ ]+# \\[annotations\\]", "");
99
				if (linesToRemove.contains(structure)) {
100
					if (debug) System.out.println("Remove line: "+line);
101
					// don't write attribute line
102
				} else {
103
					writer.write(line+"\n");
104
				}
105
			} else {
106
				writer.write(line+"\n");
107
			}
108
			line = reader.readLine();
109
		}
110
		reader.close();
111
		writer.close();
112

  
113
		return (registryFile.delete() && registryFile_tmp.renameTo(registryFile));
114
	}
115

  
116
	public static void main(String args[]) throws IOException {
117
		File registryFile = new File("/home/mdecorde/TXM/corpora/BAIP/registry/baip");
118
		File dataDirectory = new File("/home/mdecorde/TXM/corpora/BAIP/data/BAIP");
119
		FixMilestoneDeclarations fm = new FixMilestoneDeclarations(registryFile, dataDirectory);
120
		System.out.println("Result: "+fm.process());
121
	}
122
}
0 123

  
tmp/CQP/src/org/txm/importer/cwb/BuildAlignOut.java (revision 399)
1
package org.txm.importer.cwb;
2

  
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.IOException;
6
import java.io.PrintWriter;
7
import java.util.ArrayList;
8

  
9
import org.txm.utils.io.IOUtils;
10
/**
11
 * Create the CWB "align.out" file usually created with cwb-align using 2 WTC files. The alignement structure and property is not checked. Only structure order is used.
12
 * 
13
 * @author mdecorde
14
 *
15
 */
16
public class BuildAlignOut {
17
	
18
	File wtcFile1;
19
	File wtcFile2;
20
	
21
	public BuildAlignOut(File wtcFile1, File wtcFile2) {
22
		this.wtcFile1 = wtcFile1;
23
		this.wtcFile2 = wtcFile2;
24
	}
25
	
26
	public boolean process(File alignOutFile, String su, String sup) throws IOException {
27
		
28
		ArrayList<Integer> positions1 = getWordPositions(wtcFile1, su, sup);
29
		ArrayList<Integer> positions2 = getWordPositions(wtcFile2, su, sup);
30
		
31
		if (positions1.size() != positions2.size()) {
32
			System.out.println("Error: the number of segment differs. "+positions1.size()+" in "+wtcFile1+" and "+positions2.size()+" in "+wtcFile2);
33
			return false;
34
		}
35
		
36
		System.out.println("Writing "+alignOutFile.getName()+" with "+positions1.size()+" positions.");
37
		String encoding = "UTF-8";
38
		PrintWriter writer = IOUtils.getWriter(alignOutFile, encoding);
39
		
40
		String c1 = wtcFile1.getName().toLowerCase();
41
		c1 = c1.substring(0, c1.length()-4);
42
		String c2 = wtcFile2.getName().toLowerCase();
43
		c2 = c2.substring(0, c2.length()-4);
44
		
45
		writer.println(c1+"\t"+su+"\t"+c2+"\t"+su);
46
		for (int i = 0 ; i < positions1.size() ; i = i+2) {
47
			writer.println(""+positions1.get(i)+"\t"+positions1.get(i+1)+"\t"+positions2.get(i)+"\t"+positions2.get(i+1)+"\t1:1\t42");
48
		}
49
		writer.close();
50
		
51
		return true;
52
	}
53

  
54
	private static ArrayList<Integer> getWordPositions(File wtcFile, String su,
55
			String sup) throws IOException {
56
		
57
		BufferedReader reader = IOUtils.getReader(wtcFile);
58
		int position_counter = 0;
59
		ArrayList<Integer> positions1 = new ArrayList<Integer>();
60
		
61
		String pattern = " "+sup+"=\"";
62
		String line = reader.readLine();
63
		while (line != null) {
64
			if (line.startsWith("<"+su) && line.contains(pattern)) {
65
				// align structure !
66
				positions1.add(position_counter);
67
			} else if (line.startsWith("</"+su+">")) {
68
				// align structure !
69
				positions1.add(position_counter-1);
70
			}  else if (line.startsWith("<")) {
71
				// structure !
72
			} else if (line.length() == 0) {
73
				// empty line
74
			} else { // word line
75
				//System.out.println("WORD: "+line);
76
				position_counter++;
77
			}
78
			line = reader.readLine();
79
		}
80
		return positions1;
81
	}
82
	
83
	public static void main(String args[]) throws IOException {
84
		
85
//		PatchCwbRegistry.patchAlignment(new File("/home/mdecorde/TEMP/align/registry", "c1"), "c2");
86
//		PatchCwbRegistry.patchAlignment(new File("/home/mdecorde/TEMP/align/registry", "c2"), "c1");
87
		
88
		File wtcFile1 = new File("/home/mdecorde/TXM/corpora/SAMPLE/wtc/SAMPLE_en0.wtc");
89
		File wtcFile2 = new File("/home/mdecorde/TXM/corpora/SAMPLE/wtc/SAMPLE_fr3.wtc");
90
		File alignOutFile = new File("/home/mdecorde/TXM/corpora/SAMPLE/align.out");
91
		
92
		BuildAlignOut bao = new BuildAlignOut(wtcFile1, wtcFile2);
93
		System.out.println("Result: "+bao.process(alignOutFile, "seg", "id"));
94
	}
95
}
0 96

  
tmp/CQP/src/org/txm/importer/cwb/CwbMakeAll.java (revision 399)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-05-26 17:42:36 +0200 (Thu, 26 May 2016) $
25
// $LastChangedRevision: 3219 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.cwb;
29

  
30
import java.io.IOException;
31
import java.util.ArrayList;
32
import java.util.Arrays;
33

  
34
import org.txm.searchengine.cqp.clientExceptions.ServerNotFoundException;
35

  
36
// TODO: Auto-generated Javadoc
37
/**
38
 * This class aims at handling a Cwb Make All.
39
 * 
40
 * @author Adrien Yepdieu
41
 */
42
public class CwbMakeAll extends CwbProcess {
43

  
44
	/**
45
	 * Instantiates a new cwb make all.
46
	 */
47
	public CwbMakeAll() {
48
		super("cwb-makeall"); //$NON-NLS-1$
49
	}
50

  
51
	/**
52
	 * Instantiates a new cwb make all.
53
	 *
54
	 * @param name the name
55
	 */
56
	public CwbMakeAll(String name) {
57
		super(name);
58
	}
59

  
60
	/**
61
	 * * Instantiates a new Cwb Make All.
62
	 *
63
	 * @param pathToExecutable the path to the executable
64
	 * @param corpusName the corpus name
65
	 * @param pathToRegistry the path to the registry
66
	 * @return true, if successful
67
	 * @throws ServerNotFoundException the server not found exception
68
	 * @throws IOException 
69
	 * @throws InterruptedException 
70
	 */
71
	public boolean run(String pathToExecutable, String corpusName,
72
			String pathToRegistry) throws ServerNotFoundException, InterruptedException, IOException {
73
		return run(pathToExecutable, corpusName, pathToRegistry, false);
74
	}
75

  
76
	/**
77
	 * execute cwb-makeall.
78
	 *
79
	 * @param pathToExecutable path to cwb-makeall executable
80
	 * @param corpusName the corpus name you want to build
81
	 * @param pathToRegistry path to the directory which contains the corpus registry file
82
	 * @param monitorOutput show output messages or not
83
	 * @return true, if successful
84
	 * @throws ServerNotFoundException the server not found exception
85
	 * @throws IOException 
86
	 * @throws InterruptedException 
87
	 */
88
	public boolean run(String pathToExecutable, String corpusName,
89
			String pathToRegistry, boolean monitorOutput)
90
			throws ServerNotFoundException, InterruptedException, IOException {
91
		if (System.getProperty("os.name").contains("Win")) //$NON-NLS-1$ //$NON-NLS-2$
92
		{
93
			pathToExecutable = "\"" + pathToExecutable + "\""; //$NON-NLS-1$ //$NON-NLS-2$
94
			pathToRegistry = "\"" + pathToRegistry + "\""; //$NON-NLS-1$ //$NON-NLS-2$
95
		}
96
		ArrayList<String> cmd = new ArrayList<String>();
97
		cmd.addAll(Arrays.asList(pathToExecutable,
98
				"-r", pathToRegistry, "-V", corpusName.toUpperCase())); //$NON-NLS-1$ //$NON-NLS-2$
99
		if (debug) {
100
			cmd.add(1, "-D"); //$NON-NLS-1$ //$NON-NLS-2$
101
		}
102
		return run(cmd, monitorOutput, true); // always wait for cwb-makeall to end
103
	}
104

  
105
	/**
106
	 * The main method.
107
	 *
108
	 * @param args the arguments
109
	 */
110
	public static void main(String[] args) {
111
		CwbMakeAll cwbMa = new CwbMakeAll();
112
		try {
113
			// cwbMa.run("/home/ayepdieu/textometrie/trunk/toolbox/src/main/C/cwb-3.0/utils/cwb-makeall",
114
			// "QGRAALCM", "/home/txm/form/registry");
115
			cwbMa.run("/home/ayepdieu/textometrie/trunk/toolbox/src/main/C/cwb-3.0/utils/cwb-makeall", "QGRAALFRMOD", "/home/txm/form/registry"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
116
			// System.out.println(cwbMa.getErrorStream().toString());
117
		} catch (Exception ex) {
118
			System.out.println(ex);
119
			// System.out.println(cwbMa.getErrorStream());
120
		}
121
	}
122
}
0 123

  
tmp/CQP/src/org/txm/importer/cwb/PatchCwbRegistry.java (revision 399)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2016-05-24 10:43:03 +0200 (Tue, 24 May 2016) $
25
// $LastChangedRevision: 3216 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer.cwb;
29

  
30
import java.io.BufferedReader;
31
import java.io.File;
32
import java.io.FileInputStream;
33
import java.io.FileOutputStream;
34
import java.io.IOException;
35
import java.io.InputStreamReader;
36
import java.io.OutputStreamWriter;
37

  
38
import org.txm.core.messages.TXMCoreMessages;
39
import org.txm.utils.logger.Log;
40

  
41
// TODO: Auto-generated Javadoc
42
/**
43
 * The Class PatchCwbRegistry.
44
 */
45
public class PatchCwbRegistry {
46
	
47
	/**
48
	 * Patch.
49
	 *
50
	 * @param registryfile the file to patch
51
	 * @param datadir the directory containing the cwb index
52
	 * @throws IOException Signals that an I/O exception has occurred.
53
	 */
54
	public static void patch(File registryfile, File datadir)
55
			throws IOException {
56
		//String corpus = registryfile.getName();
57
		
58
		// HOME INFO
59
		File temp = File.createTempFile("temp", ".registry", registryfile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
60
		
61
		String encoding = "UTF-8"; //$NON-NLS-1$
62
		//if (OSDetector.isFamilyWindows()) encoding = "ISO-8859-1"; //$NON-NLS-1$
63
		BufferedReader reader = new BufferedReader(new InputStreamReader(
64
				new FileInputStream(registryfile), encoding));
65
		OutputStreamWriter writer = new OutputStreamWriter(
66
				new FileOutputStream(temp), encoding);
67

  
68
		String line = reader.readLine();
69
		while (line != null) {
70
			//System.out.println("LINE: "+line);
71
			if (line.startsWith("HOME")) //$NON-NLS-1$
72
				writer.write("HOME \"" + datadir + "\"\n"); //$NON-NLS-1$ //$NON-NLS-2$
73
			else if (line.startsWith("INFO")) //$NON-NLS-1$
74
				writer.write("INFO \"" + datadir + "/.info\"\n"); //$NON-NLS-1$ //$NON-NLS-2$
75
			else
76
				writer.write(line + "\n"); //$NON-NLS-1$
77
			line = reader.readLine();
78
		}
79
		writer.close();
80
		reader.close();
81
		
82
		registryfile.delete();
83
		
84
//		System.out.println("TMP file: "+temp);
85
//		System.out.println("TMP file exists?: "+temp.exists());
86
//		System.out.println("REG file exists?: "+registryfile.exists());
87
		if (registryfile.exists()) {
88
			System.out.println("ERROR: old registry path still exists. Result in "+temp);
89
		} else {
90
			temp.renameTo(registryfile);
91
		}
92
		
93
//		System.out.println("TMP file exists?: "+temp.exists());
94
//		System.out.println("REG file exists?: "+registryfile.exists());
95
	}
96

  
97
	/**
98
	 * Change the encoding value.
99
	 *
100
	 * @param registryfile the registry file to patch
101
	 * @param corpusEncoding the new encoding value
102
	 * @throws IOException Signals that an I/O exception has occurred.
103
	 */
104
	public static void patchEncoding(File registryfile, String corpusEncoding)
105
			throws IOException {
106
		String corpus = registryfile.getName();
107

  
108
		// HOME INFO
109
		File temp = File.createTempFile("temp", ".regsitry", registryfile.getParentFile()); //$NON-NLS-1$ //$NON-NLS-2$
110

  
111
		String encoding = "UTF-8"; //$NON-NLS-1$
112
		//if (OSDetector.isFamilyWindows()) encoding = "ISO-8859-1"; //$NON-NLS-1$
113
		BufferedReader reader = new BufferedReader(new InputStreamReader(
114
				new FileInputStream(registryfile), encoding));
115
		OutputStreamWriter writer = new OutputStreamWriter(
116
				new FileOutputStream(temp), encoding);
117

  
118
		if (corpusEncoding.startsWith("ISO") || corpusEncoding.startsWith("iso")) { //$NON-NLS-1$ //$NON-NLS-2$
119
			corpusEncoding = "latin" + corpusEncoding.substring(9);//remove iso-8859- //$NON-NLS-1$
120
		}
121
		corpusEncoding = corpusEncoding.toLowerCase();
122

  
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff