Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / analec / misc / SchemasListMacro.groovy @ 1166

History | View | Annotate | Download (5.2 kB)

1 671 mdecorde
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2 671 mdecorde
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3 671 mdecorde
// @author mdecorde
4 671 mdecorde
// @author sheiden
5 671 mdecorde
6 671 mdecorde
// STANDARD DECLARATIONS
7 671 mdecorde
package org.txm.macro.analec.misc
8 671 mdecorde
9 671 mdecorde
import org.kohsuke.args4j.*
10 671 mdecorde
import groovy.transform.Field
11 671 mdecorde
import org.txm.rcp.swt.widget.parameters.*
12 671 mdecorde
import org.txm.analec.*
13 671 mdecorde
import org.txm.searchengine.cqp.corpus.*
14 671 mdecorde
import org.txm.Toolbox
15 671 mdecorde
import org.txm.rcp.commands.*
16 671 mdecorde
import org.apache.commons.lang.StringUtils
17 671 mdecorde
18 671 mdecorde
// BEGINNING OF PARAMETERS
19 671 mdecorde
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="Coréférence")
20 671 mdecorde
String schema_type
21 671 mdecorde
22 671 mdecorde
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3")
23 671 mdecorde
int minimum_schema_size
24 671 mdecorde
25 671 mdecorde
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="")
26 671 mdecorde
String schema_property_name
27 671 mdecorde
28 671 mdecorde
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*")
29 671 mdecorde
String schema_property_value
30 671 mdecorde
31 671 mdecorde
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="Maillon")
32 671 mdecorde
String unit_type
33 671 mdecorde
34 671 mdecorde
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="")
35 671 mdecorde
String unit_property_name
36 671 mdecorde
37 671 mdecorde
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*")
38 671 mdecorde
String unit_property_value
39 671 mdecorde
40 671 mdecorde
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word        lemma        frlemma        frolemma        #forme#        id", required=false, def="word")
41 671 mdecorde
String word_property
42 671 mdecorde
43 671 mdecorde
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ")
44 671 mdecorde
String separator
45 671 mdecorde
46 671 mdecorde
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false')
47 671 mdecorde
def buildCQL
48 671 mdecorde
49 671 mdecorde
50 671 mdecorde
if (!(corpusViewSelection instanceof MainCorpus)) {
51 671 mdecorde
        println "Corpus view selection is not a Corpus"
52 671 mdecorde
        return;
53 671 mdecorde
}
54 671 mdecorde
55 671 mdecorde
if (!ParametersDialog.open(this)) return;
56 671 mdecorde
// END OF PARAMETERS
57 671 mdecorde
58 671 mdecorde
MainCorpus corpus = corpusViewSelection
59 1166 mdecorde
def analecCorpus = AnalecCorpora.getCorpus(corpus)
60 671 mdecorde
61 671 mdecorde
// check Schema parameters
62 671 mdecorde
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) {
63 671 mdecorde
        println "No schema with name=$schema_type"
64 671 mdecorde
        return;
65 671 mdecorde
} else {
66 671 mdecorde
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
67 671 mdecorde
                // test property existance
68 671 mdecorde
                def props = analecCorpus.getStructure().getSchemaProperties(schema_type);
69 671 mdecorde
                if (!props.contains(schema_property_name)) {
70 671 mdecorde
                        println "Schema $schema_type has no property named $schema_property_name"
71 671 mdecorde
                        return;
72 671 mdecorde
                }
73 671 mdecorde
        }
74 671 mdecorde
}
75 671 mdecorde
76 671 mdecorde
// check unit parameters
77 671 mdecorde
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) {
78 671 mdecorde
        println "No unit with name=$unit_type"
79 671 mdecorde
        return;
80 671 mdecorde
} else {
81 671 mdecorde
        if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
82 671 mdecorde
                // test property existance
83 671 mdecorde
                def props = analecCorpus.getStructure().getUniteProperties(unit_type);
84 671 mdecorde
                if (!props.contains(unit_property_name)) {
85 671 mdecorde
                        println "Unit $unit_type has no property named $unit_property_name"
86 671 mdecorde
                        return;
87 671 mdecorde
                }
88 671 mdecorde
        }
89 671 mdecorde
}
90 671 mdecorde
91 788 mdecorde
def CQI = CQPSearchEngine.getCqiClient()
92 671 mdecorde
93 671 mdecorde
if (buildCQL) {
94 671 mdecorde
        word_prop = corpus.getProperty("id")
95 671 mdecorde
} else {
96 671 mdecorde
        word_prop = corpus.getProperty(word_property)
97 671 mdecorde
}
98 671 mdecorde
99 671 mdecorde
def schemas = analecCorpus.getSchemas(schema_type)
100 671 mdecorde
schemas.sort() {it.getProps()}
101 671 mdecorde
def nSchemas = 0
102 671 mdecorde
103 671 mdecorde
def lens = [:]
104 671 mdecorde
for (def schema : schemas) {
105 671 mdecorde
106 671 mdecorde
        if (schema_property_name.length() > 0 && schema_property_value.length() > 0) {
107 671 mdecorde
                if (!schema.getProp(schema_property_name).matches(schema_property_value)) {
108 671 mdecorde
                        // ignoring this schema
109 671 mdecorde
                        continue
110 671 mdecorde
                }
111 671 mdecorde
        }
112 671 mdecorde
113 671 mdecorde
        def nUnites = 0
114 671 mdecorde
        for (def unit : schema.getUnitesSousjacentes()) {
115 671 mdecorde
                if (unit_type.length() > 0) {
116 671 mdecorde
                        if (!unit.getType().equals(unit_type)) {
117 671 mdecorde
                                continue
118 671 mdecorde
                        }
119 671 mdecorde
                }
120 671 mdecorde
121 671 mdecorde
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
122 671 mdecorde
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
123 671 mdecorde
                                // ignoring this schema
124 671 mdecorde
                                continue
125 671 mdecorde
                        }
126 671 mdecorde
                }
127 671 mdecorde
128 671 mdecorde
                nUnites++
129 671 mdecorde
        }
130 671 mdecorde
131 671 mdecorde
        if (nUnites < minimum_schema_size) continue
132 671 mdecorde
133 671 mdecorde
        print schema.getProps().toString()+ ": "
134 671 mdecorde
        def first = true
135 671 mdecorde
        for (def unit : schema.getUnitesSousjacentes()) {
136 671 mdecorde
                if (unit_type.length() > 0) {
137 671 mdecorde
                        if (!unit.getType().equals(unit_type)) {
138 671 mdecorde
                                continue
139 671 mdecorde
                        }
140 671 mdecorde
                }
141 671 mdecorde
142 671 mdecorde
                if (unit_property_name.length() > 0 && unit_property_value.length() > 0) {
143 671 mdecorde
                        if (!unit.getProp(unit_property_name).matches(unit_property_value)) {
144 671 mdecorde
                                // ignoring this schema
145 671 mdecorde
                                continue
146 671 mdecorde
                        }
147 671 mdecorde
                }
148 671 mdecorde
149 671 mdecorde
                String forme =  null;
150 671 mdecorde
151 671 mdecorde
                if (buildCQL) {
152 671 mdecorde
                        int[] pos = null
153 671 mdecorde
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
154 671 mdecorde
                        else pos = (unit.getDeb()..unit.getFin())
155 671 mdecorde
                        def first2= true
156 671 mdecorde
                        q = ""
157 671 mdecorde
                        pos.each {
158 671 mdecorde
                                if (first2) { first2 = false } else { q = q+" " }
159 671 mdecorde
                                int[] pos2 = [it]
160 671 mdecorde
                                q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]"
161 671 mdecorde
                        }
162 671 mdecorde
                        if (first) { first = false } else { print "|" }
163 671 mdecorde
                        print "("+q+")"
164 671 mdecorde
                } else {
165 671 mdecorde
                        if (word_prop == null) { // word_property is the analec unit property to use
166 671 mdecorde
                        forme = unit.getProp(word_property)
167 671 mdecorde
                        } else {
168 671 mdecorde
                        int[] pos = null
169 671 mdecorde
                        if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
170 671 mdecorde
                        else pos = (unit.getDeb()..unit.getFin())
171 671 mdecorde
172 671 mdecorde
                        forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
173 671 mdecorde
                        }
174 671 mdecorde
175 671 mdecorde
                        if (first) { first = false } else { print separator }
176 671 mdecorde
                        print forme
177 671 mdecorde
                }
178 671 mdecorde
        }
179 671 mdecorde
        println ""
180 671 mdecorde
181 671 mdecorde
        nSchemas++
182 671 mdecorde
}