Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / macro / stats / BasicVocabularyMacro.groovy @ 499

History | View | Annotate | Download (3.5 kB)

1
package org.txm.macro.stats
2
// Copyright © 2010-2016 ENS de Lyon.
3
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
4
// Lyon 2, University of Franche-Comté, University of Nice
5
// Sophia Antipolis, University of Paris 3.
6
// @author mdecorde
7
// @author sheiden
8
//
9
// The TXM platform is free software: you can redistribute it
10
// and/or modify it under the terms of the GNU General Public
11
// License as published by the Free Software Foundation,
12
// either version 2 of the License, or (at your option) any
13
// later version.
14
//
15
// The TXM platform is distributed in the hope that it will be
16
// useful, but WITHOUT ANY WARRANTY; without even the implied
17
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18
// PURPOSE. See the GNU General Public License for more
19
// details.
20
//
21
// You should have received a copy of the GNU General
22
// Public License along with the TXM platform. If not, see
23
// http://www.gnu.org/licenses.
24

    
25
// This macro computes the basic vocabulary of a partition
26
// given a specificities table. The result is saved in a
27
// TSV file.
28

    
29
import org.txm.Toolbox
30
import org.txm.searchengine.cqp.clientExceptions.*
31
import org.txm.searchengine.cqp.corpus.*
32
import org.txm.searchengine.cqp.corpus.query.*
33
import org.txm.statsengine.r.core.RWorkspace
34
import org.kohsuke.args4j.*
35
import groovy.transform.Field
36
import org.txm.rcp.swt.widget.parameters.*
37
import org.txm.specificities.core.functions.SpecificitesResult
38

    
39
if (!(corpusViewSelection instanceof SpecificitesResult)) {
40
        println "** You need to select a Specificities result icon in the Corpus view before calling this macro. Aborting."
41
        return
42
}
43

    
44
// PARAMETERS
45

    
46
@Field @Option(name="outputDirectory", usage="output directory", widget="Folder", required=true, def=".")
47
def outputDirectory
48

    
49
@Field @Option(name="outputFile", usage="TSV result file", widget="String", required=true, def="banal.tsv")
50
def outputFile
51

    
52
@Field @Option(name="scoreMax", usage="score maximum threshold", widget="Float", required=false, def="1.5")
53
def scoreMax
54

    
55
//@Field @Option(name="suseSum", usage="Use the row sums : y/n", widget="String", required=false, def="n")
56
def suseSum // not used
57

    
58
// END OF PARAMETERS
59
if (!ParametersDialog.open(this)) return
60

    
61
if (suseSum == null || suseSum.length() == 0) suseSum = "n"
62
boolean useSum = suseSum.toLowerCase().charAt(0) == "y"
63

    
64
if (useSum) {
65
        println "Using line sums..."
66
}
67

    
68
def output = new File(outputDirectory.toString()+"/"+outputFile)
69
def writer = output.newWriter("UTF-8")
70
def specif = corpusViewSelection
71
def indices = specif.getSpecificitesIndex()
72
def freqs = specif.getFrequency()
73
def rownames = specif.getTypeNames()
74
def colnames = specif.getPartShortNames()
75

    
76
def selected = []
77

    
78
writer.print "unit"
79
writer.print "\tF"
80
writer.print "\tscore_max"
81

    
82
for (int j = 0; j < colnames.size() ; j++) {
83
        writer.print "\t"+colnames[j]
84
        writer.print "\tscore"
85
}
86

    
87
writer.println ""
88

    
89
for (int i = 0; i < rownames.length ; i++) {
90
        boolean add = true
91
        def totF = 0
92
        def totscore = 0.0
93
        def maxS = 0
94

    
95
        for (int j = 0; j < colnames.size() ; j++) {
96

    
97
                def sp = Math.abs(indices[i][j])
98
                if (sp >= scoreMax) {
99
                        add = false
100
                } else {
101
                        if (sp >= maxS) maxS = sp
102
                }
103
                totF += freqs[i][j]
104
                totscore += sp
105
        }
106
        
107
        if (useSum) {
108
                if (totscore >= scoreMax) add = false
109
                else add = true
110
        }
111
        
112
        if (add) {
113
                selected << rownames[i]
114
                writer.print rownames[i]
115
                writer.print "\t$totF"
116
                writer.print "\t$maxS"
117
                for (int j = 0; j < colnames.size() ; j++) {
118
                        writer.print "\t"+freqs[i][j]
119
                        writer.print "\t"+indices[i][j]
120
                }
121
                writer.println ""
122
        }
123
}
124

    
125
println "Selected: "+selected
126
println "Saved in $output."
127

    
128
writer.close()
129