Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / exploit / SchemasListMacro.groovy @ 2134

History | View | Annotate | Download (4.4 kB)

1 481 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 481 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 481 mdecorde
// @author mdecorde
4 481 mdecorde
// @author sheiden
5 481 mdecorde
6 481 mdecorde
// STANDARD DECLARATIONS
7 2082 mdecorde
package org.txm.macro.urs.exploit
8 481 mdecorde
9 481 mdecorde
import org.kohsuke.args4j.*
10 481 mdecorde
import groovy.transform.Field
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 1217 mdecorde
import org.txm.annotation.urs.*
13 1968 mdecorde
import visuAnalec.elements.*
14 481 mdecorde
import org.txm.searchengine.cqp.corpus.*
15 1968 mdecorde
import org.txm.macro.urs.AnalecUtils
16 481 mdecorde
import org.txm.Toolbox
17 671 mdecorde
import org.txm.rcp.commands.*
18 481 mdecorde
import org.apache.commons.lang.StringUtils
19 1968 mdecorde
import org.txm.searchengine.cqp.CQPSearchEngine
20 481 mdecorde
21 2105 mdecorde
def selection = []
22 2105 mdecorde
for (def s : corpusViewSelections) {
23 2105 mdecorde
        if (s instanceof CQPCorpus) selection << s
24 2105 mdecorde
        else if (s instanceof Partition) selection.addAll(s.getParts())
25 1968 mdecorde
}
26 1968 mdecorde
27 2105 mdecorde
if (selection.size() == 0) {
28 2105 mdecorde
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
29 2105 mdecorde
        return false
30 2105 mdecorde
} else {
31 2105 mdecorde
        for (def c : selection) c.compute(false)
32 2105 mdecorde
}
33 2105 mdecorde
34 481 mdecorde
// BEGINNING OF PARAMETERS
35 1962 mdecorde
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
36 1962 mdecorde
String schema_ursql
37 481 mdecorde
38 1962 mdecorde
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
39 481 mdecorde
int minimum_schema_size
40 2099 mdecorde
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
41 2099 mdecorde
int maximum_schema_size
42 1962 mdecorde
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
43 1962 mdecorde
String unit_ursql
44 481 mdecorde
45 481 mdecorde
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word        lemma        frlemma        frolemma        #forme#        id", required=false, def="word")
46 481 mdecorde
String word_property
47 481 mdecorde
48 481 mdecorde
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ")
49 481 mdecorde
String separator
50 481 mdecorde
51 481 mdecorde
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false')
52 481 mdecorde
def buildCQL
53 481 mdecorde
54 1968 mdecorde
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
55 1968 mdecorde
debug
56 481 mdecorde
57 481 mdecorde
if (!ParametersDialog.open(this)) return;
58 1968 mdecorde
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
59 481 mdecorde
60 1968 mdecorde
61 2105 mdecorde
for (def corpus : selection) {
62 2105 mdecorde
        def analecCorpus = URSCorpora.getCorpus(corpus)
63 481 mdecorde
64 2105 mdecorde
        def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
65 2105 mdecorde
        if (errors.size() > 0) {
66 2105 mdecorde
                println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
67 2105 mdecorde
                return;
68 2105 mdecorde
        }
69 2105 mdecorde
70 2105 mdecorde
        errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
71 2105 mdecorde
        if (errors.size() > 0) {
72 2105 mdecorde
                println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
73 2105 mdecorde
                return;
74 2105 mdecorde
        }
75 2105 mdecorde
76 2105 mdecorde
        def CQI = CQPSearchEngine.getCqiClient()
77 2105 mdecorde
78 2105 mdecorde
        if (buildCQL) {
79 2105 mdecorde
                word_prop = corpus.getProperty("id")
80 2105 mdecorde
        } else {
81 2105 mdecorde
                word_prop = corpus.getProperty(word_property)
82 2105 mdecorde
        }
83 2105 mdecorde
84 2105 mdecorde
        def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
85 2105 mdecorde
        schemas.sort() {it.getProps()}
86 2105 mdecorde
        def nSchemas = 0
87 2105 mdecorde
88 2105 mdecorde
        def lens = [:]
89 2105 mdecorde
90 2105 mdecorde
        println "$corpus schemas (${schemas.size()}):"
91 2105 mdecorde
        for (def schema : schemas) {
92 2105 mdecorde
93 2105 mdecorde
                def allUnites = schema.getUnitesSousjacentesNonTriees()
94 2105 mdecorde
                def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
95 2134 mdecorde
                units.sort()
96 2105 mdecorde
97 2105 mdecorde
                print schema.getProps().toString()+ ": "
98 2105 mdecorde
                def first = true
99 2105 mdecorde
                for (def unit : units) {
100 2105 mdecorde
101 2105 mdecorde
                        String forme =  null;
102 2105 mdecorde
103 2105 mdecorde
                        if (buildCQL) {
104 1962 mdecorde
                                int[] pos = null
105 1962 mdecorde
                                if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
106 1962 mdecorde
                                else pos = (unit.getDeb()..unit.getFin())
107 2105 mdecorde
                                def first2= true
108 2105 mdecorde
                                q = ""
109 2105 mdecorde
                                pos.each {
110 2105 mdecorde
                                        if (first2) { first2 = false } else { q = q+" " }
111 2105 mdecorde
                                        int[] pos2 = [it]
112 2105 mdecorde
                                        q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]"
113 2105 mdecorde
                                }
114 2105 mdecorde
                                if (first) { first = false } else { print "|" }
115 2105 mdecorde
                                print "("+q+")"
116 2105 mdecorde
                        } else {
117 2105 mdecorde
                                if (word_prop == null) { // word_property is the analec unit property to use
118 2105 mdecorde
                                        forme = unit.getProp(word_property)
119 2105 mdecorde
                                } else {
120 2105 mdecorde
                                        int[] pos = null
121 2105 mdecorde
                                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
122 2105 mdecorde
                                        else pos = (unit.getDeb()..unit.getFin())
123 2105 mdecorde
124 2105 mdecorde
                                                forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
125 2105 mdecorde
                                }
126 2105 mdecorde
127 2105 mdecorde
                                if (first) { first = false } else { print separator }
128 2105 mdecorde
                                print forme
129 481 mdecorde
                        }
130 481 mdecorde
                }
131 2105 mdecorde
                println ""
132 2105 mdecorde
133 2105 mdecorde
                nSchemas++
134 481 mdecorde
        }
135 2105 mdecorde
}