Révision 3965

TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/stats/TopVocabularyMacro.groovy (revision 3965)
1
package org.txm.macro.stats
2
// Copyright © 2010-2016 ENS de Lyon.
3
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
4
// Lyon 2, University of Franche-Comté, University of Nice
5
// Sophia Antipolis, University of Paris 3.
6
// @author mdecorde
7
// @author sheiden
8
//
9
// The TXM platform is free software: you can redistribute it
10
// and/or modify it under the terms of the GNU General Public
11
// License as published by the Free Software Foundation,
12
// either version 2 of the License, or (at your option) any
13
// later version.
14
//
15
// The TXM platform is distributed in the hope that it will be
16
// useful, but WITHOUT ANY WARRANTY; without even the implied
17
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18
// PURPOSE. See the GNU General Public License for more
19
// details.
20
//
21
// You should have received a copy of the GNU General
22
// Public License along with the TXM platform. If not, see
23
// http://www.gnu.org/licenses.
24

  
25
// This macro computes the basic vocabulary of a partition
26
// given a specificities table. The result is saved in a
27
// TSV file.
28

  
29
import org.txm.Toolbox
30
import org.txm.searchengine.cqp.clientExceptions.*
31
import org.txm.searchengine.cqp.corpus.*
32
import org.txm.searchengine.cqp.corpus.query.*
33
import org.txm.statsengine.r.core.RWorkspace
34
import org.kohsuke.args4j.*
35
import groovy.transform.Field
36
import org.txm.rcp.swt.widget.parameters.*
37
import org.txm.specificities.core.functions.Specificities
38

  
39
if (!(corpusViewSelection instanceof Specificities)) {
40
	println "** You need to select a Specificities result icon in the Corpus view before calling this macro. Aborting."
41
	return
42
}
43

  
44
// PARAMETERS
45

  
46
@Field @Option(name="outputFile", usage="TSV result file", widget="String", required=true, def="top.tsv")
47
def outputFile
48

  
49
@Field @Option(name="nTop", usage="Number of words per column", widget="Integer", required=false, def="10")
50
def nTop
51

  
52
// END OF PARAMETERS
53
if (!ParametersDialog.open(this)) return
54

  
55

  
56
Specificities specif = corpusViewSelection
57
def indices = specif.getSpecificitesIndices()
58
def freqs = specif.getFrequencies()
59
def rownames = specif.getRowNames()
60
def colnames = specif.getColumnsNames()
61

  
62
def output = new File(outputFile)
63
def writer = output.newWriter("UTF-8")
64
for (int j = 0; j < colnames.size() ; j++) {
65
	
66
	writer.println "## "+colnames[j]
67
	writer.println "unit\tF\tindice"
68
	
69
	def sorted = []
70
	
71
	for (int i = 0; i < rownames.length ; i++) {
72
		sorted << [rownames[i], freqs[i][j], indices[i][j]]
73
	}
74
	
75
	sorted.sort() { -it[2]}
76
	
77
	for (int i = 0 ; i < nTop ; i++) {
78
		writer.println sorted[i].join("\t")
79
	}
80
	
81
	for (int i = nTop ; i >= 1 ; i--) {
82
		writer.println sorted[sorted.size() - i].join("\t")
83
	}
84
	
85
	writer.println "\n\n"
86
}
87

  
88
writer.close()
89
println "Result: "+output.getAbsolutePath()
90

  

Formats disponibles : Unified diff