42 |
42 |
import org.txm.core.messages.TXMCoreMessages;
|
43 |
43 |
import org.txm.core.preferences.TXMPreferences;
|
44 |
44 |
import org.txm.core.results.Parameter;
|
45 |
|
import org.txm.core.results.TXMParameters;
|
46 |
45 |
import org.txm.core.results.TXMResult;
|
47 |
46 |
import org.txm.index.core.functions.PartitionIndex;
|
48 |
47 |
import org.txm.lexicaltable.core.functions.LexicalTable;
|
... | ... | |
61 |
60 |
import org.txm.statsengine.r.core.RWorkspace;
|
62 |
61 |
import org.txm.statsengine.r.core.exceptions.RWorkspaceException;
|
63 |
62 |
import org.txm.utils.TXMProgressMonitor;
|
|
63 |
import org.txm.utils.io.IOUtils;
|
64 |
64 |
import org.txm.utils.logger.Log;
|
65 |
65 |
|
66 |
66 |
/**
|
... | ... | |
74 |
74 |
*
|
75 |
75 |
*/
|
76 |
76 |
public class Specificities extends TXMResult implements RResult {
|
77 |
|
|
|
77 |
|
78 |
78 |
public static int MAXSPECIF = 1000;
|
79 |
|
|
|
79 |
|
80 |
80 |
/** The indices. */
|
81 |
81 |
private double[][] indices;
|
82 |
|
|
|
82 |
|
83 |
83 |
/** The rowindex. */
|
84 |
84 |
private int[] rowindex = null;
|
85 |
|
|
|
85 |
|
86 |
86 |
/** The colindex. */
|
87 |
87 |
private int[] colindex = null;
|
88 |
|
|
|
88 |
|
89 |
89 |
/** The frequencies. */
|
90 |
90 |
private int[][] frequencies = null;
|
91 |
|
|
|
91 |
|
92 |
92 |
/** The colnames. */
|
93 |
93 |
private List<String> colnames = null;
|
94 |
|
|
|
94 |
|
95 |
95 |
/** The rownames. */
|
96 |
96 |
private List<String> rownames = null;
|
97 |
|
|
|
97 |
|
98 |
98 |
/** The symbol. */
|
99 |
99 |
private String symbol;
|
100 |
|
|
|
100 |
|
101 |
101 |
/** The writer. */
|
102 |
102 |
@Deprecated
|
103 |
103 |
private BufferedWriter writer;
|
104 |
|
|
|
104 |
|
105 |
105 |
/**
|
106 |
106 |
* The table. contains **all** the corpus data necessary to compute the
|
107 |
107 |
* Specificities
|
108 |
108 |
*/
|
109 |
109 |
private LexicalTable lexicalTable;
|
110 |
|
|
|
110 |
|
111 |
111 |
/**
|
112 |
112 |
* Maximum score.
|
113 |
113 |
*/
|
114 |
114 |
@Parameter(key = SpecificitiesPreferences.MAX_SCORE)
|
115 |
115 |
protected int pMaxScore;
|
116 |
|
|
|
116 |
|
117 |
117 |
/**
|
118 |
118 |
* Minimum score.
|
119 |
119 |
*/
|
... | ... | |
121 |
121 |
protected int pMinScore;
|
122 |
122 |
|
123 |
123 |
/**
|
|
124 |
* Minimum score.
|
|
125 |
*/
|
|
126 |
@Parameter(key = SpecificitiesPreferences.CONTRAST_SCRIPT)
|
|
127 |
protected String pContrastScript;
|
|
128 |
|
|
129 |
/**
|
124 |
130 |
* Creates a not computed Specificities.
|
125 |
131 |
*
|
126 |
132 |
* @param parent
|
... | ... | |
128 |
134 |
public Specificities(LexicalTable parent) {
|
129 |
135 |
super(parent);
|
130 |
136 |
}
|
131 |
|
|
|
137 |
|
132 |
138 |
/**
|
133 |
139 |
* Creates a not computed Specificities.
|
134 |
140 |
*
|
... | ... | |
137 |
143 |
public Specificities(String parametersNodePath) {
|
138 |
144 |
this(parametersNodePath, null);
|
139 |
145 |
}
|
140 |
|
|
|
146 |
|
141 |
147 |
/**
|
142 |
148 |
* Creates a not computed Specificities.
|
143 |
149 |
*
|
... | ... | |
146 |
152 |
public Specificities(String parametersNodePath, LexicalTable parent) {
|
147 |
153 |
super(parametersNodePath, parent);
|
148 |
154 |
}
|
149 |
|
|
|
155 |
|
150 |
156 |
@Override
|
151 |
157 |
public boolean loadParameters() {
|
152 |
158 |
this.lexicalTable = (LexicalTable) this.parent;
|
153 |
159 |
return true;
|
154 |
160 |
}
|
155 |
|
|
|
161 |
|
156 |
162 |
@Override
|
157 |
163 |
public boolean saveParameters() {
|
158 |
164 |
// nothing to do
|
159 |
165 |
return true;
|
160 |
166 |
}
|
161 |
|
|
|
167 |
|
162 |
168 |
@Override
|
163 |
169 |
protected boolean _compute(TXMProgressMonitor monitor) throws Exception {
|
164 |
|
|
|
170 |
|
165 |
171 |
this.frequencies = null;
|
166 |
|
|
|
172 |
|
167 |
173 |
// delete the specificities selection chart children since they can not be valid anymore if the unit property has changed
|
168 |
174 |
// FIXME: doesn't work
|
169 |
|
// if (this.lexicalTable.hasParameterChanged(TBXPreferences.UNIT_PROPERTY)) {
|
170 |
|
// this.deleteChildren(SpecificitiesSelection.class);
|
171 |
|
// }
|
172 |
|
|
|
175 |
// if (this.lexicalTable.hasParameterChanged(TBXPreferences.UNIT_PROPERTY)) {
|
|
176 |
// this.deleteChildren(SpecificitiesSelection.class);
|
|
177 |
// }
|
|
178 |
|
173 |
179 |
ILexicalTable data = this.lexicalTable.getData();
|
174 |
|
SpecificitiesR rSpecificities = new SpecificitiesR(data);
|
175 |
|
double[][] specIndex = rSpecificities.getScores();
|
176 |
|
|
177 |
|
init(rSpecificities.getSymbol(), specIndex);
|
178 |
|
|
|
180 |
double[][] specIndex = null;
|
|
181 |
if ("specif".equals(pContrastScript)) {
|
|
182 |
|
|
183 |
SpecificitiesR rSpecificities = new SpecificitiesR(data);
|
|
184 |
specIndex = rSpecificities.getScores();
|
|
185 |
init(rSpecificities.getSymbol(), specIndex);
|
|
186 |
} else if ("relative".equals(pContrastScript)) {
|
|
187 |
|
|
188 |
String script = "apply("+this.lexicalTable.getRSymbol()+", 2, function(x) x/sum(x)*100)";
|
|
189 |
specIndex = RWorkspace.getRWorkspaceInstance().eval(script).asDoubleMatrix();
|
|
190 |
init(lexicalTable.getRSymbol()+"Rel", specIndex);
|
|
191 |
} else if (pContrastScript.contains("SYMBOL")) {
|
|
192 |
|
|
193 |
String script = pContrastScript.replace("SYMBOL", this.lexicalTable.getRSymbol());
|
|
194 |
specIndex = RWorkspace.getRWorkspaceInstance().eval(script).asDoubleMatrix();
|
|
195 |
init(lexicalTable.getRSymbol()+"Rel", specIndex);
|
|
196 |
} else if (pContrastScript.toUpperCase().endsWith(".R") && new File(pContrastScript).exists()) {
|
|
197 |
|
|
198 |
String script = IOUtils.getText(new File(pContrastScript)).replace("SYMBOL", this.lexicalTable.getRSymbol());
|
|
199 |
specIndex = RWorkspace.getRWorkspaceInstance().eval(script).asDoubleMatrix();
|
|
200 |
init(lexicalTable.getRSymbol()+"Rel", specIndex);
|
|
201 |
}
|
|
202 |
|
179 |
203 |
return true;
|
180 |
204 |
}
|
181 |
|
|
|
205 |
|
182 |
206 |
/**
|
183 |
207 |
* Instantiates a new specificities result.
|
184 |
208 |
*
|
... | ... | |
187 |
211 |
* @throws Exception the stat exception
|
188 |
212 |
*/
|
189 |
213 |
protected void init(String symbol, double[][] specIndex) throws Exception {
|
190 |
|
|
|
214 |
|
191 |
215 |
if (this.symbol != null && this.symbol.equals(symbol)) {
|
192 |
216 |
try {
|
193 |
217 |
RWorkspace.getRWorkspaceInstance().removeVariableFromWorkspace(this.symbol);
|
... | ... | |
197 |
221 |
}
|
198 |
222 |
}
|
199 |
223 |
this.symbol = symbol;
|
200 |
|
|
|
224 |
|
201 |
225 |
if (this.lexicalTable == null) {
|
202 |
226 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.theLexicalTableCannotBeNull);
|
203 |
227 |
}
|
204 |
228 |
// this.name = name;
|
205 |
|
|
|
229 |
|
206 |
230 |
if (specIndex == null || specIndex.length == 0) {
|
207 |
231 |
throw new IllegalArgumentException(SpecificitiesCoreMessages.noSpecificitiesIndexArray);
|
208 |
232 |
}
|
... | ... | |
211 |
235 |
throw new IllegalArgumentException(
|
212 |
236 |
SpecificitiesCoreMessages.theSpecificityIndexArrayDoesNotProperlyRepresentAMatrix);
|
213 |
237 |
}
|
214 |
|
|
|
238 |
|
215 |
239 |
this.indices = specIndex;
|
216 |
240 |
this.colnames = Arrays.asList(lexicalTable.getColNames().asStringsArray());
|
217 |
241 |
this.rownames = Arrays.asList(lexicalTable.getRowNames().asStringsArray());
|
218 |
|
|
|
242 |
|
219 |
243 |
// filter by max&min
|
220 |
244 |
int MAX = MAXSPECIF;
|
221 |
245 |
int MIN = -MAXSPECIF;
|
222 |
|
|
|
246 |
|
223 |
247 |
if (pMaxScore > 0) {
|
224 |
248 |
MAX = pMaxScore;
|
225 |
249 |
MIN = -pMaxScore;
|
226 |
250 |
}
|
227 |
|
|
|
251 |
|
228 |
252 |
// fixing max and min score values
|
229 |
253 |
for (int i = 0; i < indices.length; i++) {
|
230 |
254 |
for (int j = 0; j < indices[i].length; j++) {
|
... | ... | |
236 |
260 |
}
|
237 |
261 |
}
|
238 |
262 |
}
|
239 |
|
|
|
263 |
|
240 |
264 |
if (this.rownames != null && this.rownames.size() != 0) {
|
241 |
265 |
if (this.rownames.size() != specIndex.length) {
|
242 |
266 |
throw new IllegalArgumentException(
|
243 |
267 |
SpecificitiesCoreMessages.bind(SpecificitiesCoreMessages.numberOfRowsWantedP0AndFoundP1Mismatch,
|
244 |
268 |
this.rownames.size(), specIndex.length));
|
245 |
269 |
}
|
246 |
|
|
|
270 |
|
247 |
271 |
rowindex = ArrayIndex.getIndex(this.lexicalTable.getRowNames().asStringsArray(),
|
248 |
272 |
this.rownames.toArray(new String[] {}));
|
249 |
273 |
for (int i : rowindex) {
|
... | ... | |
269 |
293 |
// }
|
270 |
294 |
// }
|
271 |
295 |
}
|
272 |
|
|
|
296 |
|
273 |
297 |
/**
|
274 |
298 |
* Utility method to compute the specificity index of a word in a subcorpus.
|
275 |
299 |
* <br>
|
... | ... | |
297 |
321 |
}
|
298 |
322 |
return 0.0d;
|
299 |
323 |
}
|
300 |
|
|
|
324 |
|
301 |
325 |
/**
|
302 |
326 |
* Sets the unit property.
|
303 |
327 |
*
|
... | ... | |
306 |
330 |
public void setUnitProperty(WordProperty unitProperty) {
|
307 |
331 |
this.getLexicalTable().setUnitProperty(unitProperty);
|
308 |
332 |
}
|
309 |
|
|
|
333 |
|
310 |
334 |
/**
|
311 |
335 |
* Gets the type focus.
|
312 |
336 |
*
|
... | ... | |
315 |
339 |
public List<String> getTypeFocus() {
|
316 |
340 |
return rownames;
|
317 |
341 |
}
|
318 |
|
|
|
342 |
|
319 |
343 |
/**
|
320 |
344 |
* Gets the part focus.
|
321 |
345 |
*
|
... | ... | |
324 |
348 |
public List<String> getPartFocus() {
|
325 |
349 |
return colnames;
|
326 |
350 |
}
|
327 |
|
|
|
351 |
|
328 |
352 |
/**
|
329 |
353 |
* Gets the number of columns of the lexical table.
|
330 |
354 |
*
|
... | ... | |
346 |
370 |
return getColumnsNames().length;
|
347 |
371 |
}
|
348 |
372 |
}
|
349 |
|
|
|
373 |
|
350 |
374 |
/**
|
351 |
375 |
* Gets the sum of all the columns frequencies.
|
352 |
376 |
*
|
... | ... | |
361 |
385 |
return 0;
|
362 |
386 |
}
|
363 |
387 |
}
|
364 |
|
|
|
388 |
|
365 |
389 |
/**
|
366 |
390 |
* Gets the sums of each columns frequencies.
|
367 |
391 |
*
|
... | ... | |
370 |
394 |
* @throws CqiClientException
|
371 |
395 |
*/
|
372 |
396 |
public int[] getColumnsFrequenciesSums() throws StatException, CqiClientException {
|
373 |
|
|
|
397 |
|
374 |
398 |
if (
|
375 |
|
// this.lexicalTable != null &&
|
376 |
|
this.lexicalTable.hasBeenComputedOnce()) {
|
|
399 |
// this.lexicalTable != null &&
|
|
400 |
this.lexicalTable.hasBeenComputedOnce()) {
|
377 |
401 |
Vector colsSizes = lexicalTable.getColMarginsVector();
|
378 |
402 |
if (colindex != null) {
|
379 |
403 |
colsSizes = colsSizes.get(colindex);
|
... | ... | |
411 |
435 |
// }
|
412 |
436 |
// }
|
413 |
437 |
}
|
414 |
|
|
|
438 |
|
415 |
439 |
@Override
|
416 |
440 |
public LexicalTable getParent() {
|
417 |
441 |
return (LexicalTable) this.parent;
|
418 |
442 |
}
|
419 |
|
|
|
443 |
|
420 |
444 |
/**
|
421 |
445 |
* Gets the specificities index.
|
422 |
446 |
*
|
... | ... | |
425 |
449 |
public double[][] getSpecificitesIndices() {
|
426 |
450 |
return indices;
|
427 |
451 |
}
|
428 |
|
|
|
452 |
|
429 |
453 |
/**
|
430 |
454 |
* Name of the type for which specificities are computed.
|
431 |
455 |
*
|
... | ... | |
435 |
459 |
public String[] getRowNames() throws Exception {
|
436 |
460 |
return lexicalTable.getRowNames().asStringsArray();
|
437 |
461 |
}
|
438 |
|
|
|
462 |
|
439 |
463 |
/**
|
440 |
464 |
* Gets the names of the lexical table columns.
|
441 |
465 |
*
|
... | ... | |
445 |
469 |
public String[] getColumnsNames() throws StatException {
|
446 |
470 |
return lexicalTable.getColNames().asStringsArray();
|
447 |
471 |
}
|
448 |
|
|
|
472 |
|
449 |
473 |
/**
|
450 |
474 |
* Gets the frequencies of all columns.
|
451 |
475 |
*
|
... | ... | |
459 |
483 |
// if (lexicalTable != null) {
|
460 |
484 |
// System.out.println("FROM TABLE");
|
461 |
485 |
frequencies = RWorkspace.getRWorkspaceInstance().evalToInt2D(lexicalTable.getData().getSymbol());
|
462 |
|
|
|
486 |
|
463 |
487 |
// }
|
464 |
488 |
// else {// if table == null : subcorpus specif
|
465 |
489 |
// //System.out.println("FROM LEXICON");
|
... | ... | |
487 |
511 |
}
|
488 |
512 |
return frequencies;
|
489 |
513 |
}
|
490 |
|
|
|
514 |
|
491 |
515 |
/**
|
492 |
516 |
* Gets the lexical table.
|
493 |
517 |
*
|
... | ... | |
496 |
520 |
public LexicalTable getLexicalTable() {
|
497 |
521 |
return lexicalTable;
|
498 |
522 |
}
|
499 |
|
|
|
523 |
|
500 |
524 |
/**
|
501 |
525 |
* The frequency in the whole corpus.
|
502 |
526 |
*
|
... | ... | |
511 |
535 |
// }
|
512 |
536 |
return formFrequencies.asIntArray();
|
513 |
537 |
}
|
514 |
|
|
|
538 |
|
515 |
539 |
@Override
|
516 |
540 |
public boolean _toTxt(File outfile, String encoding, String colseparator, String txtseparator) throws Exception {
|
517 |
|
|
|
541 |
|
518 |
542 |
// NK: Declared as class attribute to perform a clean if the operation is
|
519 |
543 |
// interrupted
|
520 |
544 |
// OutputStreamWriter writer;
|
... | ... | |
529 |
553 |
org.txm.utils.logger.Log.printStackTrace(e1);
|
530 |
554 |
return false;
|
531 |
555 |
}
|
532 |
|
|
|
556 |
|
533 |
557 |
// int[] T = getPartSize();
|
534 |
558 |
acquireSemaphore();
|
535 |
559 |
try {
|
... | ... | |
568 |
592 |
finally {
|
569 |
593 |
releaseSemaphore();
|
570 |
594 |
}
|
571 |
|
|
|
595 |
|
572 |
596 |
return true;
|
573 |
597 |
}
|
574 |
|
|
|
598 |
|
575 |
599 |
/**
|
576 |
600 |
* release the R semaphore
|
577 |
601 |
*/
|
... | ... | |
579 |
603 |
super.resetComputingState();
|
580 |
604 |
releaseSemaphore();
|
581 |
605 |
}
|
582 |
|
|
|
606 |
|
583 |
607 |
@Override
|
584 |
608 |
public void clean() {
|
585 |
609 |
try {
|
... | ... | |
591 |
615 |
// TODO Auto-generated catch block
|
592 |
616 |
e.printStackTrace();
|
593 |
617 |
}
|
594 |
|
|
|
618 |
|
595 |
619 |
try {
|
596 |
620 |
if (this.writer != null) {
|
597 |
621 |
this.writer.flush();
|
... | ... | |
602 |
626 |
org.txm.utils.logger.Log.printStackTrace(e);
|
603 |
627 |
}
|
604 |
628 |
}
|
605 |
|
|
|
629 |
|
606 |
630 |
@Override
|
607 |
631 |
public String getRSymbol() {
|
608 |
632 |
return this.symbol;
|
609 |
633 |
}
|
610 |
|
|
|
634 |
|
611 |
635 |
// /**
|
612 |
636 |
// * Gets the sorted part indexes.
|
613 |
637 |
// *
|
... | ... | |
643 |
667 |
// }
|
644 |
668 |
// return new int[0];
|
645 |
669 |
// }
|
646 |
|
|
|
670 |
|
647 |
671 |
@Override
|
648 |
672 |
public String getName() {
|
649 |
673 |
// FIXME: SJ: to define
|
650 |
674 |
return this.getParent().getParent().getName() + TXMPreferences.PARENT_NAME_SEPARATOR + this.getSimpleName();
|
651 |
675 |
}
|
652 |
|
|
|
676 |
|
653 |
677 |
@Override
|
654 |
678 |
public String getSimpleName() {
|
655 |
679 |
// FIXME: SJ: to define
|
... | ... | |
676 |
700 |
return this.getEmptyName();
|
677 |
701 |
}
|
678 |
702 |
}
|
679 |
|
|
|
703 |
|
680 |
704 |
@Override
|
681 |
705 |
public String getDetails() {
|
682 |
706 |
return NLS.bind(this.getName() + " (max score={0})", this.pMaxScore); //$NON-NLS-1$
|
683 |
707 |
}
|
684 |
|
|
|
708 |
|
685 |
709 |
@Override
|
686 |
710 |
public String getComputingStartMessage() {
|
687 |
711 |
// from lexical table
|
... | ... | |
705 |
729 |
}
|
706 |
730 |
}
|
707 |
731 |
}
|
708 |
|
|
|
732 |
|
709 |
733 |
@Override
|
710 |
734 |
public boolean canCompute() {
|
711 |
|
|
|
735 |
|
712 |
736 |
if (this.lexicalTable == null) {
|
713 |
737 |
Log.severe("Specificities.canCompute(): can not compute without a lexical table.");
|
714 |
738 |
return false;
|
715 |
739 |
}
|
716 |
|
|
|
740 |
|
717 |
741 |
// if (this.lexicalTable.getNColumns() < 2) {
|
718 |
742 |
// Log.severe(SpecificitiesCoreMessages.ComputeError_NEED_AT_LEAST_2_PARTS);
|
719 |
743 |
// return false;
|
720 |
744 |
// }
|
721 |
|
|
|
745 |
|
722 |
746 |
return this.lexicalTable.getProperty() != null;
|
723 |
747 |
}
|
724 |
|
|
|
748 |
|
725 |
749 |
/**
|
726 |
750 |
* @param maxScoreFilter the pMaxScore to set
|
727 |
751 |
*/
|
728 |
752 |
public void setMaxScoreFilter(int maxScoreFilter) {
|
729 |
753 |
this.pMaxScore = maxScoreFilter;
|
730 |
754 |
}
|
731 |
|
|
|
755 |
|
732 |
756 |
/**
|
733 |
757 |
* @return the maxScore
|
734 |
758 |
*/
|
735 |
759 |
public int getMaxScore() {
|
736 |
760 |
return pMaxScore;
|
737 |
761 |
}
|
738 |
|
|
|
762 |
|
739 |
763 |
/**
|
740 |
764 |
* @return the unitProperty
|
741 |
765 |
*/
|
742 |
766 |
public Property getUnitProperty() {
|
743 |
767 |
return this.getLexicalTable().getProperty();
|
744 |
768 |
}
|
745 |
|
|
|
769 |
|
746 |
770 |
@Override
|
747 |
771 |
public String getResultType() {
|
748 |
772 |
return SpecificitiesCoreMessages.RESULT_TYPE;
|