Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / UnitsReferentialDensityMacro.groovy @ 2134

History | View | Annotate | Download (3.2 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

    
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

    
16
def selection = []
17
for (def s : corpusViewSelections) {
18
        if (s instanceof CQPCorpus) selection << s
19
        else if (s instanceof Partition) selection.addAll(s.getParts())
20
}
21

    
22
if (selection.size() == 0) {
23
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
24
        return false
25
} else {
26
        for (def c : selection) c.compute(false)
27
}
28

    
29
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
30
String schema_ursql
31
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=false, def="3")
32
int minimum_schema_size
33
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=false, def="9999999")
34
int maximum_schema_size
35
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
36
String unit_ursql
37
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
38
int position_in_schema
39
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
40
cql_limit
41
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
42
strict_inclusion
43
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
44
position_in_matches
45

    
46
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
47
debug
48

    
49
if (!ParametersDialog.open(this)) return;
50
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
51

    
52
for (def corpus : selection) {
53
        
54
        def analecCorpus = URSCorpora.getCorpus(corpus);
55
        
56
        int nMots = corpus.getSize();
57
        
58
        def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size,
59
        unit_ursql, position_in_schema, cql_limit, strict_inclusion, position_in_matches);
60
        
61
        int nUnites = units.size();
62
        
63
        coef = (nUnites /nMots)
64
        rounded = String.format( "%.2f", coef * 100.0d )
65
        println "$corpus referential density: nUnites/nMots = $nUnites/$nMots = $coef = ${rounded}%"
66
        if (nUnites >= nMots) {
67
                println "WARNING: possible encoding error in $corpus. Number of units ($nUnites) is greater than number of words ($nMots)"
68
        }
69
        //return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
70
}