Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsSummaryMacro.groovy @ 2167

History | View | Annotate | Download (4 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
// STANDARD DECLARATIONS
6 1217 mdecorde
package org.txm.macro.urs.exploit
7 671 mdecorde
8 671 mdecorde
import org.apache.commons.lang.StringUtils
9 671 mdecorde
import org.kohsuke.args4j.*
10 671 mdecorde
11 671 mdecorde
import groovy.transform.Field
12 671 mdecorde
13 671 mdecorde
import org.txm.Toolbox
14 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
15 1217 mdecorde
import org.txm.annotation.urs.*
16 1217 mdecorde
import org.txm.macro.urs.AnalecUtils
17 671 mdecorde
import org.txm.searchengine.cqp.AbstractCqiClient
18 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
19 1979 mdecorde
import org.txm.searchengine.cqp.CQPSearchEngine
20 671 mdecorde
21 671 mdecorde
import visuAnalec.donnees.Structure
22 671 mdecorde
import visuAnalec.elements.*
23 671 mdecorde
24 671 mdecorde
def scriptName = this.class.getSimpleName()
25 671 mdecorde
26 671 mdecorde
def selection = []
27 671 mdecorde
for (def s : corpusViewSelections) {
28 1094 mdecorde
        if (s instanceof CQPCorpus) selection << s
29 671 mdecorde
        else if (s instanceof Partition) selection.addAll(s.getParts())
30 671 mdecorde
}
31 671 mdecorde
32 671 mdecorde
if (selection.size() == 0) {
33 671 mdecorde
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
34 671 mdecorde
        return false
35 2105 mdecorde
} else {
36 2105 mdecorde
        for (def c : selection) c.compute(false)
37 671 mdecorde
}
38 671 mdecorde
39 671 mdecorde
// BEGINNING OF PARAMETERS
40 671 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
41 671 mdecorde
                String schema_ursql
42 671 mdecorde
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
43 671 mdecorde
                int minimum_schema_size
44 671 mdecorde
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
45 671 mdecorde
                int maximum_schema_size
46 671 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
47 671 mdecorde
                String unit_ursql
48 2094 mdecorde
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
49 2094 mdecorde
                int position_in_schema
50 2094 mdecorde
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
51 2094 mdecorde
                cql_limit
52 2082 mdecorde
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
53 671 mdecorde
                boolean strict_inclusion
54 2099 mdecorde
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
55 2099 mdecorde
                int position_in_matches
56 671 mdecorde
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
57 671 mdecorde
                debug
58 671 mdecorde
if (!ParametersDialog.open(this)) return
59 671 mdecorde
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
60 671 mdecorde
61 1979 mdecorde
def CQI = CQPSearchEngine.getCqiClient()
62 671 mdecorde
63 671 mdecorde
def allresults = new LinkedHashMap()
64 671 mdecorde
def props = new HashSet()
65 671 mdecorde
for (def corpus : selection) {
66 671 mdecorde
67 2144 mdecorde
        def word = corpus.getWordProperty()
68 2144 mdecorde
        def analecCorpus = URSCorpora.getCorpus(corpus)
69 671 mdecorde
        for (def type : analecCorpus.getStructure().getUnites())
70 671 mdecorde
                props.addAll(analecCorpus.getStructure().getUniteProperties(type));
71 671 mdecorde
72 671 mdecorde
        def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
73 2099 mdecorde
        unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
74 671 mdecorde
75 2167 mdecorde
        if (debug) println "found ${selectedUnits.size()} units in $corpus}"
76 671 mdecorde
        allresults[corpus] = selectedUnits;
77 671 mdecorde
}
78 671 mdecorde
79 671 mdecorde
// tabulate summary
80 671 mdecorde
def keys = allresults.keySet().sort();
81 671 mdecorde
println "Statistiques des unités de "+corpusViewSelections.join(",")
82 671 mdecorde
println "\t"+keys.join("\t")
83 671 mdecorde
print "Units"
84 671 mdecorde
for (def k : keys) {
85 671 mdecorde
        print "\t"+allresults[k].size();
86 671 mdecorde
}
87 671 mdecorde
println ""
88 671 mdecorde
for (def p : props) {
89 671 mdecorde
        print "$p"
90 671 mdecorde
        for (def k : keys) {
91 671 mdecorde
                def propsvalues = new HashSet()
92 671 mdecorde
                for (def u : allresults[k]) {
93 671 mdecorde
                        propsvalues.add(u.getProp(p))
94 671 mdecorde
                }
95 671 mdecorde
                print "\t"+propsvalues.size();
96 2167 mdecorde
                def values = []
97 2167 mdecorde
                values.addAll(propsvalues)
98 2167 mdecorde
                values = values.sort()
99 2167 mdecorde
                if (debug) print "\t\""+values.join("\", \"")+"\"";
100 671 mdecorde
        }
101 671 mdecorde
        println ""
102 671 mdecorde
}