Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasListMacro.groovy @ 2144

History | View | Annotate | Download (4.6 kB)

1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

    
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit
8

    
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import visuAnalec.elements.*
14
import org.txm.searchengine.cqp.corpus.*
15
import org.txm.macro.urs.AnalecUtils
16
import org.txm.Toolbox
17
import org.txm.rcp.commands.*
18
import org.apache.commons.lang.StringUtils
19
import org.txm.searchengine.cqp.CQPSearchEngine
20

    
21
def selection = []
22
for (def s : corpusViewSelections) {
23
        if (s instanceof CQPCorpus) selection << s
24
        else if (s instanceof Partition) selection.addAll(s.getParts())
25
}
26

    
27
if (selection.size() == 0) {
28
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
29
        return false
30
} else {
31
        for (def c : selection) c.compute(false)
32
}
33

    
34
// BEGINNING OF PARAMETERS
35
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
36
String schema_ursql
37

    
38
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
39
int minimum_schema_size
40
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
41
int maximum_schema_size
42
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
43
String unit_ursql
44
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
45
boolean strict_inclusion
46
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word        lemma        frlemma        frolemma        #forme#        id", required=false, def="word")
47
String word_property
48

    
49
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ")
50
String separator
51

    
52
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false')
53
def buildCQL
54

    
55
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
56
debug
57

    
58
if (!ParametersDialog.open(this)) return;
59
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
60

    
61

    
62
for (def corpus : selection) {
63
        def analecCorpus = URSCorpora.getCorpus(corpus)
64
        
65
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
66
        if (errors.size() > 0) {
67
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
68
                return;
69
        }
70
        
71
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
72
        if (errors.size() > 0) {
73
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
74
                return;
75
        }
76
        
77
        def CQI = CQPSearchEngine.getCqiClient()
78
        
79
        if (buildCQL) {
80
                word_prop = corpus.getProperty("id")
81
        } else {
82
                word_prop = corpus.getProperty(word_property)
83
        }
84
        
85
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, strict_inclusion);
86
        schemas.sort() {it.getProps()}
87
        def nSchemas = 0
88
        
89
        def lens = [:]
90
        
91
        println "$corpus schemas (${schemas.size()}):"
92
        for (def schema : schemas) {
93
        
94
                def allUnites = schema.getUnitesSousjacentesNonTriees()
95
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
96
                units.sort()
97
                
98
                print schema.getProps().toString()+ ": "
99
                def first = true
100
                for (def unit : units) {
101
        
102
                        String forme =  null;
103
        
104
                        if (buildCQL) {
105
                                int[] pos = null
106
                                if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
107
                                else pos = (unit.getDeb()..unit.getFin())
108
                                def first2= true
109
                                q = ""
110
                                pos.each {
111
                                        if (first2) { first2 = false } else { q = q+" " }
112
                                        int[] pos2 = [it]
113
                                        q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]"
114
                                }
115
                                if (first) { first = false } else { print "|" }
116
                                print "("+q+")"
117
                        } else {
118
                                if (word_prop == null) { // word_property is the analec unit property to use
119
                                        forme = unit.getProp(word_property)
120
                                } else {
121
                                        int[] pos = null
122
                                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
123
                                        else pos = (unit.getDeb()..unit.getFin())
124
                                        
125
                                                forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
126
                                }
127
        
128
                                if (first) { first = false } else { print separator }
129
                                print forme
130
                        }
131
                }
132
                println ""
133
        
134
                nSchemas++
135
        }
136
}
137