Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasIndexMacro.groovy @ 2167

History | View | Annotate | Download (5 kB)

1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

    
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit
8

    
9
import groovy.transform.Field
10

    
11
import org.apache.commons.lang.StringUtils
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.annotation.urs.*
15
import org.txm.lexicaltable.core.functions.LexicalTable
16
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl
17
import org.txm.macro.urs.*
18
import org.txm.rcp.commands.*
19
import org.txm.rcp.swt.widget.parameters.*
20
import org.txm.rcp.views.corpora.CorporaView
21
import org.txm.searchengine.cqp.corpus.*
22
import org.txm.searchengine.cqp.CQPSearchEngine
23
import org.txm.statsengine.r.core.RWorkspace
24

    
25
import visuAnalec.donnees.*
26
import visuAnalec.elements.*
27
import cern.colt.matrix.DoubleFactory2D
28
import cern.colt.matrix.DoubleMatrix2D
29

    
30
def selection = []
31
for (def s : corpusViewSelections) {
32
        if (s instanceof CQPCorpus) selection << s
33
        else if (s instanceof Partition) selection.addAll(s.getParts())
34
}
35

    
36
if (selection.size() == 0) {
37
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
38
        return false
39
} else {
40
        for (def c : selection) c.compute(false)
41
}
42

    
43
// BEGINNING OF PARAMETERS
44
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
45
String schema_ursql
46
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
47
int minimum_schema_size
48
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
49
int maximum_schema_size
50
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
51
String schema_property_display
52
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
53
String unit_ursql
54
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
55
boolean strict_inclusion
56
@Field @Option(name="output_fmin", usage="minimal frequency displayed", widget="Integer", required=false, def="0")
57
output_fmin
58
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
59
debug
60

    
61
if (!ParametersDialog.open(this)) return;
62
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
63

    
64
def allresults = [:]
65
for (def corpus : selection) {
66
        def analecCorpus = URSCorpora.getCorpus(corpus)
67
        
68
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
69
        if (errors.size() > 0) {
70
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
71
                return;
72
        }
73
        
74
        errors=AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_property_display)
75
        if (errors.size() > 0) {
76
                println "** $schema_property_display schema property not found in the corpus with types: $errors."
77
                return;
78
        }
79
        
80
        errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
81
        if (errors.size() > 0) {
82
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
83
                return;
84
        }
85
        
86
        def CQI = CQPSearchEngine.getCqiClient()
87
                
88
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
89
        schemas.sort() {it.getProps()}
90
        
91
        def counts = schemas.countBy { if (it.getProp(schema_property_display) == null) "<null>" else it.getProp(schema_property_display) }
92
        
93
        if (counts.containsKey("")) counts["<empty>"] = counts[""];
94
        counts.remove("")
95
        
96
        if (debug) {
97
                println "\n*** Statistics: "
98
                for (def k : counts.keySet()) {
99
                        if (k.size() > 0) { println "$k\t"+counts[k] }else{ println "N/A\t"+counts[k] }
100
                }
101
        }
102

    
103
        allresults[corpus] = counts
104
}
105

    
106
if (debug) println "RESULTS=$allresults"
107
def keys = new HashSet();
108
for (def corpus : allresults.keySet()) keys.addAll(allresults[corpus].keySet())
109
if (keys.size() == 0) {
110
        println "No values for $schema_property_display property"
111
        return
112
}
113
keys = keys.sort()
114
mFactory = DoubleFactory2D.dense
115
matrix = mFactory.make(keys.size(), selection.size())
116

    
117
if (debug) println "KEYS=$keys"
118

    
119

    
120

    
121
def title = "${selection}\n${unit_ursql}"
122
title += " ${schema_property_display} frequencies"
123

    
124
println "Index de la propriété $schema_property_display des schemas $schema_ursql de ${selection}"
125
println "$schema_property_display\t"+selection.join("\t")
126

    
127
keys.eachWithIndex { prop_val, i ->
128
        String line = ""
129
        if (prop_val.size() > 0) {
130
                line += prop_val
131
        } else {
132
                line += "N/A"
133
        }
134

    
135
        int min = 0;
136
        allresults.keySet().eachWithIndex { c, j ->
137
                def freq = allresults[c][prop_val]
138
                if (freq == null) freq = 0
139

    
140
                matrix.set(i, j, freq)
141
                line += "\t"+freq
142
                if (min < freq) min = freq
143
        }
144
        if (min >= output_fmin)
145
                println "$line"
146
}
147
props = keys
148

    
149