Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsSummaryMacro.groovy @ 1217

History | View | Annotate | Download (3.8 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.apache.commons.lang.StringUtils
9
import org.kohsuke.args4j.*
10

    
11
import groovy.transform.Field
12

    
13
import org.txm.Toolbox
14
import org.txm.rcp.swt.widget.parameters.*
15
import org.txm.annotation.urs.*
16
import org.txm.macro.urs.AnalecUtils
17
import org.txm.searchengine.cqp.AbstractCqiClient
18
import org.txm.searchengine.cqp.corpus.*
19

    
20
import visuAnalec.donnees.Structure
21
import visuAnalec.elements.*
22

    
23
def scriptName = this.class.getSimpleName()
24

    
25
def selection = []
26
for (def s : corpusViewSelections) {
27
        if (s instanceof CQPCorpus) selection << s
28
        else if (s instanceof Partition) selection.addAll(s.getParts())
29
}
30

    
31
if (selection.size() == 0) {
32
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
33
        return false
34
}
35

    
36
// BEGINNING OF PARAMETERS
37
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
38
                String schema_ursql
39
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
40
                int minimum_schema_size
41
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
42
                int maximum_schema_size
43
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
44
                String unit_ursql
45
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
46
                int limit_distance_in_schema
47
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
48
                limit_cql
49
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
50
                boolean strict_inclusion
51
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
52
                int limit_distance
53
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
54
                debug
55
if (!ParametersDialog.open(this)) return
56
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
57

    
58

    
59
        def CQI = CQPSearchEngine.getCqiClient()
60

    
61
def allresults = new LinkedHashMap()
62
def props = new HashSet()
63
for (def corpus : selection) {
64

    
65
        mainCorpus = corpus.getMainCorpus()
66

    
67
        def word = mainCorpus.getWordProperty()
68
        def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName())
69
        for (def type : analecCorpus.getStructure().getUnites())
70
                props.addAll(analecCorpus.getStructure().getUniteProperties(type));
71

    
72
        def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, 
73
        unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
74

    
75
        allresults[corpus] = selectedUnits;
76
}
77

    
78
// tabulate summary
79
def keys = allresults.keySet().sort();
80
println "Statistiques des unités de "+corpusViewSelections.join(",")
81
println "\t"+keys.join("\t")
82
print "Units"
83
for (def k : keys) {
84
        print "\t"+allresults[k].size();
85
}
86
println ""
87
for (def p : props) {
88
        print "$p"
89
        for (def k : keys) {
90
                def propsvalues = new HashSet()
91
                for (def u : allresults[k]) {
92
                        propsvalues.add(u.getProp(p))
93
                }
94
                if (debug) println "k @p values="+propsvalues
95
                print "\t"+propsvalues.size();
96
        }
97
        println ""
98
}
99