Revision 319

tmp/org.txm.referencer.core/src/org/txm/functions/referencer/package.html (revision 319)
1
<html>
2
<body>
3
<p>Compute an index such as those in books</p>
4
</body>
5
</html>
0 6

  
tmp/org.txm.referencer.core/src/org/txm/functions/referencer/Referencer.java (revision 319)
1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21

  
22
//
23
// This file is part of the TXM platform.
24
//
25
// The TXM platform is free software: you can redistribute it and/or modif y
26
// it under the terms of the GNU General Public License as published by
27
// the Free Software Foundation, either version 3 of the License, or
28
// (at your option) any later version.
29
//
30
// The TXM platform is distributed in the hope that it will be useful,
31
// but WITHOUT ANY WARRANTY; without even the implied warranty of
32
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33
// GNU General Public License for more details.
34
//
35
// You should have received a copy of the GNU General Public License
36
// along with the TXM platform.  If not, see <http://www.gnu.org/licenses/>.
37
// 
38
// 
39
// 
40
// $LastChangedDate: 2016-11-29 16:47:07 +0100 (Tue, 29 Nov 2016) $
41
// $LastChangedRevision: 3349 $
42
// $LastChangedBy: mdecorde $ 
43
//
44
package org.txm.functions.referencer;
45

  
46
import java.io.BufferedWriter;
47
import java.io.File;
48
import java.io.FileOutputStream;
49
import java.io.IOException;
50
import java.io.OutputStreamWriter;
51
import java.io.Writer;
52
import java.util.ArrayList;
53
import java.util.Collections;
54
import java.util.Comparator;
55
import java.util.HashMap;
56
import java.util.List;
57
import java.util.Map;
58

  
59
import org.txm.core.messages.TXMCoreMessages;
60
import org.txm.functions.Function;
61
import org.txm.searchengine.cqp.AbstractCqiClient;
62
import org.txm.searchengine.cqp.clientExceptions.CqiClientException;
63
import org.txm.searchengine.cqp.corpus.Corpus;
64
import org.txm.searchengine.cqp.corpus.CorpusManager;
65
import org.txm.searchengine.cqp.corpus.Property;
66
import org.txm.searchengine.cqp.corpus.QueryResult;
67
import org.txm.searchengine.cqp.corpus.query.Match;
68
import org.txm.searchengine.cqp.corpus.query.Query;
69
import org.txm.searchengine.cqp.serverException.CqiServerError;
70
import org.txm.statsengine.r.core.RWorkspace;
71
import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
72
import org.txm.utils.logger.Log;
73

  
74
// TODO: Auto-generated Javadoc
75
/**
76
 * The Class Referencer.
77
 */
78
public class Referencer extends Function  {
79

  
80
	/** The query. */
81
	Query query;
82

  
83
	/** The pattern. */
84
	List<Property> pattern;
85

  
86
	/** The prop. */
87
	Property prop;
88

  
89
	/** The nlines. */
90
	int nlines;
91

  
92
	/** The lines. */
93
	List<Line> lines;
94

  
95
	/** The cache. */
96
	//private Map<Property, CqpDataProxy> cache;
97

  
98
	/** The hierarchic sort. */
99
	private boolean hierarchicSort;
100
	
101
	/** The writer. */
102
	private BufferedWriter writer;
103

  
104

  
105
	/**
106
	 * Instantiates a new referencer.
107
	 *
108
	 * @param corpus the corpus
109
	 * @param query the query
110
	 * @param prop the prop
111
	 * @param pattern the pattern
112
	 * @param hierarchicSort the hierarchic sort
113
	 */
114
	public Referencer(Corpus corpus, Query query, Property prop, List<Property> pattern, boolean hierarchicSort)
115
	{
116
		super(corpus);
117
		this.query = query;
118
		this.pattern = pattern;
119
		this.prop = prop;
120
		this.hierarchicSort = hierarchicSort;
121
	}
122

  
123
	/** The result. */
124
	QueryResult result;
125

  
126
	/** The matches. */
127
	List<Match> matches;
128

  
129
	/** The indexes. */
130
	int[] indexes;
131

  
132
	/** The positions. */
133
	int[] positions;
134

  
135
	/** The processed idx. */
136
	HashMap<Integer, Line> processedIdx;
137

  
138
	/** The count idx. */
139
	private HashMap<Integer, Integer> countIdx;
140

  
141
	/** The symbol. */
142
	private String symbol;
143

  
144
	/**
145
	 * Call in order all stages to compute the referencer.
146
	 *
147
	 * @return true, if successful
148
	 * @throws CqiClientException the cqi client exception
149
	 * @throws IOException Signals that an I/O exception has occurred.
150
	 * @throws CqiServerError the cqi server error
151
	 */
152
	public boolean compute() throws CqiClientException, IOException, CqiServerError
153
	{
154
		if (this.parent == null || query == null || pattern == null || prop == null)
155
			return false;
156
		if (pattern.size() == 0)
157
			return false;
158
		if (query.getQueryString().equals("")) //$NON-NLS-1$
159
			return false;
160

  
161
		if (!getQueryMatches())
162
			return false;
163

  
164
		if (!getQueryindexes())
165
			return false;
166

  
167
		if (!groupPositionsbyId())
168
			return false;
169

  
170
		return true;
171
	}
172

  
173
	/**
174
	 * 1st step.
175
	 *
176
	 * @return the query matches
177
	 * @throws CqiClientException 
178
	 */
179
	public boolean getQueryMatches() throws CqiClientException{
180

  
181
		result = this.getCorpus().query(query, TXMCoreMessages.Referencer_1, false);
182
		if (result.getNMatch() == 0)
183
			return false;
184
		matches = result.getMatches();
185
		result.drop();
186
		return true;
187
	}
188

  
189
	/**
190
	 * 2nd step.
191
	 *
192
	 * @return the queryindexes
193
	 */
194
	public boolean getQueryindexes(){
195
		try {
196
			positions = new int[matches.size()];
197
			int i = 0;
198
			for (Match m : matches) {
199
				positions[i++] = m.getStart();
200
			}
201

  
202
			indexes = CorpusManager.getCorpusManager().getCqiClient().cpos2Id(prop.getQualifiedName(),positions);
203
		} catch (Exception e) {
204
			// TODO Auto-generated catch block
205
			org.txm.utils.logger.Log.printStackTrace(e);
206
			return false;
207
		}
208
		return true;
209
	}
210

  
211
	/**
212
	 * 3rd stage.
213
	 *
214
	 * @return true, if successful
215
	 */
216
	public boolean groupPositionsbyId() {
217
		processedIdx = new HashMap<Integer, Line>();
218

  
219
		lines = new ArrayList<Line>();
220
		//System.out.println("indexes ");
221
		for (int i = 0 ; i < indexes.length ; i++) {
222
			int id = indexes[i];
223
			//System.out.print(" "+id);
224
			if (!processedIdx.containsKey(id)) {	
225
				//System.out.println("add idx "+id);
226
				Line l = new Line(id);
227
				processedIdx.put(id, l);
228

  
229
				lines.add(l);
230
			}
231
			processedIdx.get(id).addPosition(positions[i]);
232
		}
233

  
234
		nlines = processedIdx.keySet().size();
235
		return true;
236
	}
237

  
238
	/**
239
	 * Gets the lines.
240
	 *
241
	 * @return the lines
242
	 * @throws CqiClientException the cqi client exception
243
	 * @throws IOException Signals that an I/O exception has occurred.
244
	 * @throws CqiServerError the cqi server error
245
	 */
246
	public List<Line> getLines() throws CqiClientException, IOException, CqiServerError
247
	{
248
		return getLines(0,lines.size());
249
	}
250

  
251
	/**
252
	 * Gets the lines.
253
	 *
254
	 * @param from the from
255
	 * @param to the to
256
	 * @return the lines
257
	 * @throws CqiClientException the cqi client exception
258
	 * @throws IOException Signals that an I/O exception has occurred.
259
	 * @throws CqiServerError the cqi server error
260
	 */
261
	public List<Line> getLines(int from, int to) throws CqiClientException, IOException, CqiServerError
262
	{
263
		if (from < 0)
264
			from = 0;
265
		if (to > lines.size())
266
			to = lines.size();
267

  
268
		HashMap<Integer, Line> lineswithoutpropvalue = new HashMap<Integer, Line>();
269

  
270
		AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient();
271
		//build Missing lines
272
		for(int i = from ; i < to ; i++)// for each position group
273
		{
274
			Line l = lines.get(i);
275
			if (l.notcomputed) {
276
				lineswithoutpropvalue.put(l.id, l);
277

  
278
				// get all reference values of all position of the line
279
				Map<Property, List<List<String>>> refValues = new HashMap<Property, List<List<String>>>();
280
				for (Property property : pattern) {
281
					refValues.put(property, cqiClient.getData(property,
282
							l.positions, Collections.nCopies(l.positions.size(), 1)));
283
				}
284

  
285
				for (int j = 0 ; j < l.positions.size() ; j++)// build the Reference object
286
				{
287
					String propvalue = ""; //$NON-NLS-1$
288
					for (Property property : pattern) {
289
						propvalue += refValues.get(property).get(j).get(0)+", "; //$NON-NLS-1$
290
					}
291

  
292
					l.addReference(propvalue.substring(0, propvalue.length()-2));
293
				}
294
				if (hierarchicSort)
295
					l.sortRef(true);
296
				else
297
					l.sortAlpha();
298
				l.notcomputed = false;
299
			}
300
		}
301

  
302
		// get idx String value
303
		int[] idxwithoutvalues = new int[lineswithoutpropvalue.keySet().size()];
304
		int count = 0;
305
		for(int i : lineswithoutpropvalue.keySet()) {
306
			idxwithoutvalues[count++] = i;
307
		}
308

  
309
		String[] values = CorpusManager.getCorpusManager().getCqiClient().id2Str(prop.getQualifiedName(), idxwithoutvalues);
310
		for(int i = 0 ; i < values.length ; i++) {
311
			int id = idxwithoutvalues[i];
312
			lineswithoutpropvalue.get(id).linepropvalue = values[i];
313
		}
314
		return lines.subList(from, to);
315
	}
316

  
317
	/**
318
	 * Gets the corpus.
319
	 *
320
	 * @return the corpus
321
	 */
322
	public Corpus getCorpus() {
323
		return (Corpus) this.parent;
324
	}
325

  
326
	/**
327
	 * Gets the query.
328
	 *
329
	 * @return the query
330
	 */
331
	public Query getQuery() {
332
		return query;
333
	}
334

  
335
	/**
336
	 * Gets the property.
337
	 *
338
	 * @return the property
339
	 */
340
	public Property getProperty() {
341
		return prop;
342
	}
343

  
344
	/**
345
	 * Gets the pattern.
346
	 *
347
	 * @return the pattern
348
	 */
349
	public List<Property> getPattern() {
350
		return pattern;
351
	}
352

  
353
	/**
354
	 * Sets the pattern.
355
	 *
356
	 * @param pattern the new pattern
357
	 */
358
	public void setPattern(List<Property> pattern) {
359
		this.pattern = pattern;
360

  
361
	}
362

  
363
	/**
364
	 * Gets the n lines.
365
	 *
366
	 * @return the n lines
367
	 */
368
	public int getNLines() {
369
		return nlines;
370
	}
371

  
372
	/**
373
	 * Sets the property.
374
	 *
375
	 * @param property the new property
376
	 */
377
	public void setProperty(Property property) {
378
		this.prop = property;
379
	}
380

  
381
	/**
382
	 * Clear all lines.
383
	 */
384
	public void clearAllLines()
385
	{
386
		for(Line line : lines)// we need to recompute lines' references
387
		{
388
			line.clear();
389
		}
390
	}
391

  
392
	/**
393
	 * The Class Line.
394
	 */
395
	public class Line
396
	{
397

  
398
		/** The id. */
399
		int id;
400

  
401
		/** The notcomputed. */
402
		boolean notcomputed=true;
403

  
404
		/** The propvalue. */
405
		String linepropvalue;
406

  
407
		/** The references. */
408
		ArrayList<String> references;
409

  
410
		/** The positions. */
411
		ArrayList<Integer> positions;
412

  
413
		/** The references counts. */
414
		HashMap<String, Integer> referencesCounts = new HashMap<String, Integer>();
415

  
416
		/**
417
		 * Instantiates a new line.
418
		 *
419
		 * @param i the i
420
		 */
421
		public Line(int i) {
422
			id = i;
423
			positions = new ArrayList<Integer>();
424
			references = new ArrayList<String>();
425
		}
426

  
427
		/**
428
		 * Sort ref.
429
		 *
430
		 * @param reverse the reverse
431
		 */
432
		public void sortRef(final boolean reverse) {
433
			Collections.sort(references, new Comparator<String>() {
434

  
435
				@Override
436
				public int compare(String arg0, String arg1) {
437
					if (reverse)
438
						return getCount(arg1) - getCount(arg0);
439
					else
440
						return getCount(arg0) - getCount(arg1);
441
				}
442
			});
443
		}
444

  
445
		/**
446
		 * Sort alpha.
447
		 */
448
		public void sortAlpha() {
449
			Collections.sort(references);
450
		}
451

  
452
		/**
453
		 * Clear.
454
		 */
455
		public void clear() {
456
			notcomputed = false;
457
			references = new ArrayList<String>();
458
		}
459

  
460
		/* (non-Javadoc)
461
		 * @see java.lang.Object#toString()
462
		 */
463
		@Override
464
		public String toString()
465
		{
466
			return linepropvalue+" : <"+id+","+prop+"> : "+references.toString(); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
467
		}
468

  
469
		/**
470
		 * Adds the reference.
471
		 *
472
		 * @param string the string
473
		 */
474
		public void addReference(String string) {
475
			if (!referencesCounts.containsKey(string))
476
			{
477
				references.add(string);
478
				referencesCounts.put(string, 1);
479
			}
480
			else
481
			{
482
				referencesCounts.put(string, referencesCounts.get(string)+1);
483
			}
484
		}
485

  
486
		/**
487
		 * Gets the references.
488
		 *
489
		 * @return the references
490
		 */
491
		public List<String> getReferences()
492
		{
493
			return references;
494
		}
495

  
496
		/**
497
		 * Adds the position.
498
		 *
499
		 * @param i the i
500
		 */
501
		public void addPosition(int i) {
502
			positions.add(i);
503
		}
504

  
505
		/**
506
		 * Gets the referencer.
507
		 *
508
		 * @return the referencer
509
		 */
510
		public Referencer getReferencer()
511
		{
512
			return Referencer.this;
513
		}
514

  
515
		/**
516
		 * Gets the prop value.
517
		 *
518
		 * @return the prop value
519
		 */
520
		public String getPropValue() {
521
			return linepropvalue;
522
		}
523

  
524
		/**
525
		 * Gets the count.
526
		 *
527
		 * @param ref the ref
528
		 * @return the count
529
		 */
530
		public int getCount(String ref)
531
		{
532
			return referencesCounts.get(ref);
533
		}
534

  
535
		public HashMap<String, Integer> getCounts()
536
		{
537
			return referencesCounts;
538
		}
539
	}
540

  
541
	/**
542
	 * Write all the lines in a file.
543
	 *
544
	 * @param outfile the outfile
545
	 * @param encoding the encoding
546
	 * @return true, if successful
547
	 */
548
	public boolean toTxt(File outfile, String encoding) {
549

  
550
		return toTxt(outfile, encoding, "\t", ""); //$NON-NLS-1$ //$NON-NLS-2$
551
	}
552

  
553
	/**
554
	 * Write all the lines in a file.
555
	 *
556
	 * @param outfile the outfile
557
	 * @param encoding the encoding
558
	 * @param colseparator the colseparator
559
	 * @param txtseparator the txtseparator
560
	 * @return true, if successful
561
	 */
562
	public boolean toTxt(File outfile, String encoding, String colseparator, String txtseparator) {
563
		try {
564
			// NK: writer declared as class attribute to perform a clean if the operation is interrupted
565
			this.writer = new BufferedWriter(new OutputStreamWriter(
566
					new FileOutputStream(outfile), encoding)); 
567
			// if ("UTF-8".equals(encoding)) writer.write('\ufeff'); // UTF-8 BOM
568
			toTxt(writer, 0, lines.size() - 1, colseparator, txtseparator);
569
		} catch (Exception e) {
570
			System.err.println(TXMCoreMessages.Referencer_2 + Log.toString(e));
571
			return false;
572
		}
573
		return true;
574
	}
575

  
576
	/**
577
	 * Write the lines between from and to in a writer.
578
	 *
579
	 * @param writer Where to write the lines
580
	 * @param from The first line to be written
581
	 * @param to The last line to be writen
582
	 * @param colseparator the colseparator
583
	 * @param txtseparator the txtseparator
584
	 * @throws CqiClientException the cqi client exception
585
	 * @throws IOException Signals that an I/O exception has occurred.
586
	 */
587
	public void toTxt(Writer writer, int from, int to, String colseparator, String txtseparator)
588
			throws CqiClientException, IOException {
589
		try {
590
			getLines(0, 9999999);
591
			writer.write(prop.getName()+colseparator+this.pattern+"\n"); //$NON-NLS-1$
592
			for (Line line : lines) {
593
				writer.write(txtseparator+line.getPropValue().replace(txtseparator, txtseparator+txtseparator)+txtseparator+ colseparator + txtseparator+ line.getReferences().toString().replace(txtseparator, txtseparator+txtseparator)+txtseparator+"\n"); //$NON-NLS-1$
594
				writer.flush();
595
			}
596

  
597
			writer.close();
598
		} catch (CqiServerError e) {
599
			// TODO Auto-generated catch block
600
			org.txm.utils.logger.Log.printStackTrace(e);
601
		}
602
	}
603

  
604

  
605

  
606
	/**
607
	 * Gets the name.
608
	 *
609
	 * @return the name
610
	 */
611
	public String getName() {
612
		return ""+query; //$NON-NLS-1$
613
	}
614

  
615

  
616
	/** The noref. */
617
	protected static int noref = 1;
618

  
619
	/** The prefix r. */
620
	protected static String prefixR = "Referencer"; //$NON-NLS-1$
621

  
622
	/**
623
	 * As r matrix.
624
	 *
625
	 * @return the string
626
	 * @throws RWorkspaceException the r workspace exception
627
	 */
628
	public String asRMatrix() throws RWorkspaceException {
629

  
630
		String keywords[] = new String[lines.size()];
631
		for(int i = 0 ; i < lines.size() ; i++)
632
			keywords[i] = lines.get(i).getPropValue();
633
		//System.out.println(Arrays.toString(keywords));
634

  
635
		String symbol = prefixR+noref;
636

  
637
		RWorkspace rw = RWorkspace.getRWorkspaceInstance();
638
		rw.eval(symbol+" <- list()"); //$NON-NLS-1$
639
		rw.addVectorToWorkspace("refkeywords", keywords); //$NON-NLS-1$
640
		rw.eval(symbol+"$keywords <- refkeywords"); //$NON-NLS-1$
641
		//rw.eval(symbol+"$query <- \""+this.query.getQueryString()+"\")");
642
		rw.eval(symbol+"$refs <- list()"); //$NON-NLS-1$
643
		int i = 1;
644
		for(Line line : this.lines)
645
		{
646
			String refs[] = line.getReferences().toArray(new String[line.getReferences().size()]);
647
			//System.out.println(line.getPropValue()+": "+Arrays.toString(refs));
648
			rw.addVectorToWorkspace("reflinerefs", refs); //$NON-NLS-1$
649
			rw.eval(symbol+"$refs[["+i+"]] <- reflinerefs"); //$NON-NLS-1$ //$NON-NLS-2$
650
			i++;
651
		}
652

  
653
		noref++;
654
		this.symbol = symbol;
655
		return symbol;
656
	}
657

  
658
	/**
659
	 * Gets the symbol.
660
	 *
661
	 * @return the symbol
662
	 */
663
	public String getSymbol() {
664
		return symbol;
665
	}
666

  
667
	@Override
668
	public void clean() {
669
		try {
670
			this.writer.flush();
671
			this.writer.close();	
672
		} catch (IOException e) {
673
			// TODO Auto-generated catch block
674
			org.txm.utils.logger.Log.printStackTrace(e);
675
		}
676
	}
677

  
678

  
679
	public int getV() {
680
		if (matches == null) return 0;
681
		return matches.size();
682
	}
683

  
684
	@Override
685
	public String getSimpleName() {
686
		// TODO Auto-generated method stub
687
		return null;
688
	}
689

  
690
	@Override
691
	public String getDetails() {
692
		// TODO Auto-generated method stub
693
		return null;
694
	}
695
}
0 696

  
tmp/org.txm.referencer.core/build.properties (revision 319)
1
source.. = src/
2
output.. = bin/
3
bin.includes = META-INF/,\
4
               .
0 5

  
tmp/org.txm.referencer.core/.settings/org.eclipse.jdt.core.prefs (revision 319)
1
eclipse.preferences.version=1
2
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
4
org.eclipse.jdt.core.compiler.compliance=1.6
5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
6
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
7
org.eclipse.jdt.core.compiler.source=1.6
0 8

  
tmp/org.txm.referencer.core/.classpath (revision 319)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<classpath>
3
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
4
	<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
5
	<classpathentry kind="src" path="src"/>
6
	<classpathentry kind="output" path="bin"/>
7
</classpath>
0 8

  
tmp/org.txm.referencer.core/META-INF/MANIFEST.MF (revision 319)
1
Manifest-Version: 1.0
2
Bundle-ManifestVersion: 2
3
Bundle-Name: Referencer
4
Bundle-SymbolicName: org.txm.referencer.core
5
Bundle-Version: 1.0.0.qualifier
6
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
7
Require-Bundle: org.txm.statsengine.r.core;bundle-version="1.0.0",
8
 org.txm.core;bundle-version="0.7.0"
9
Export-Package: org.txm.functions.referencer
0 10

  
tmp/org.txm.referencer.core/.project (revision 319)
1
<?xml version="1.0" encoding="UTF-8"?>
2
<projectDescription>
3
	<name>org.txm.referencer.core</name>
4
	<comment></comment>
5
	<projects>
6
	</projects>
7
	<buildSpec>
8
		<buildCommand>
9
			<name>org.eclipse.jdt.core.javabuilder</name>
10
			<arguments>
11
			</arguments>
12
		</buildCommand>
13
		<buildCommand>
14
			<name>org.eclipse.pde.ManifestBuilder</name>
15
			<arguments>
16
			</arguments>
17
		</buildCommand>
18
		<buildCommand>
19
			<name>org.eclipse.pde.SchemaBuilder</name>
20
			<arguments>
21
			</arguments>
22
		</buildCommand>
23
	</buildSpec>
24
	<natures>
25
		<nature>org.eclipse.pde.PluginNature</nature>
26
		<nature>org.eclipse.jdt.core.javanature</nature>
27
	</natures>
28
</projectDescription>
0 29

  

Also available in: Unified diff