Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / macro / stats / BasicVocabularyMacro.groovy @ 187

History | View | Annotate | Download (3.6 kB)

1
package org.txm.macro.stats
2
// Copyright © 2010-2016 ENS de Lyon.
3
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
4
// Lyon 2, University of Franche-Comté, University of Nice
5
// Sophia Antipolis, University of Paris 3.
6
// @author mdecorde
7
// @author sheiden
8
//
9
// The TXM platform is free software: you can redistribute it
10
// and/or modify it under the terms of the GNU General Public
11
// License as published by the Free Software Foundation,
12
// either version 2 of the License, or (at your option) any
13
// later version.
14
//
15
// The TXM platform is distributed in the hope that it will be
16
// useful, but WITHOUT ANY WARRANTY; without even the implied
17
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
18
// PURPOSE. See the GNU General Public License for more
19
// details.
20
//
21
// You should have received a copy of the GNU General
22
// Public License along with the TXM platform. If not, see
23
// http://www.gnu.org/licenses.
24

    
25
// This macro computes the basic vocabulary of a partition
26
// given a specificities table. The result is saved in a
27
// TSV file.
28

    
29
import org.txm.Toolbox
30
import org.txm.functions.ca.*
31
import org.txm.searchengine.cqp.clientExceptions.*
32
import org.txm.searchengine.cqp.corpus.*
33
import org.txm.searchengine.cqp.corpus.query.*
34
import org.txm.stat.*
35
import org.txm.stat.data.*
36
import org.txm.stat.engine.r.*
37
import org.txm.functions.specificities.*
38
import org.kohsuke.args4j.*
39
import groovy.transform.Field
40
import org.txm.rcpapplication.swt.widget.parameters.*
41

    
42
//TODO temporary uncommented
43
//if (!(corpusViewSelection instanceof SpecificitesResult)) {
44
//        println "** You need to select a Specificities result icon in the Corpus view before calling this macro. Aborting."
45
//        return
46
//}
47

    
48
// PARAMETERS
49

    
50
@Field @Option(name="outputDirectory", usage="output directory", widget="Folder", required=true, def=".")
51
def outputDirectory
52

    
53
@Field @Option(name="outputFile", usage="TSV result file", widget="String", required=true, def="banal.tsv")
54
def outputFile
55

    
56
@Field @Option(name="scoreMax", usage="score maximum threshold", widget="Float", required=false, def="1.5")
57
def scoreMax
58

    
59
//@Field @Option(name="suseSum", usage="Use the row sums : y/n", widget="String", required=false, def="n")
60
def suseSum // not used
61

    
62
// END OF PARAMETERS
63
if (!ParametersDialog.open(this)) return
64

    
65
if (suseSum == null || suseSum.length() == 0) suseSum = "n"
66
boolean useSum = suseSum.toLowerCase().charAt(0) == "y"
67

    
68
if (useSum) {
69
        println "Using line sums..."
70
}
71

    
72
def output = new File(outputDirectory.toString()+"/"+outputFile)
73
def writer = output.newWriter("UTF-8")
74
def specif = corpusViewSelection
75
def indices = specif.getSpecificitesIndex()
76
def freqs = specif.getFrequency()
77
def rownames = specif.getTypeNames()
78
def colnames = specif.getPartShortNames()
79

    
80
def selected = []
81

    
82
writer.print "unit"
83
writer.print "\tF"
84
writer.print "\tscore_max"
85

    
86
for (int j = 0; j < colnames.size() ; j++) {
87
        writer.print "\t"+colnames[j]
88
        writer.print "\tscore"
89
}
90

    
91
writer.println ""
92

    
93
for (int i = 0; i < rownames.length ; i++) {
94
        boolean add = true
95
        def totF = 0
96
        def totscore = 0.0
97
        def maxS = 0
98

    
99
        for (int j = 0; j < colnames.size() ; j++) {
100

    
101
                def sp = Math.abs(indices[i][j])
102
                if (sp >= scoreMax) {
103
                        add = false
104
                } else {
105
                        if (sp >= maxS) maxS = sp
106
                }
107
                totF += freqs[i][j]
108
                totscore += sp
109
        }
110
        
111
        if (useSum) {
112
                if (totscore >= scoreMax) add = false
113
                else add = true
114
        }
115
        
116
        if (add) {
117
                selected << rownames[i]
118
                writer.print rownames[i]
119
                writer.print "\t$totF"
120
                writer.print "\t$maxS"
121
                for (int j = 0; j < colnames.size() ; j++) {
122
                        writer.print "\t"+freqs[i][j]
123
                        writer.print "\t"+indices[i][j]
124
                }
125
                writer.println ""
126
        }
127
}
128

    
129
println "Selected: "+selected
130
println "Saved in $output."
131

    
132
writer.close()
133