Statistics
| Revision:

root / tmp / org.txm.analec.rcp / src / org / txm / macro / urs / check / CheckDuplicatesInSchemasMacro.groovy @ 2144

History | View | Annotate | Download (3.4 kB)

1
package org.txm.macro.urs.check
2

    
3
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
4
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
5
// @author mdecorde
6
// @author sheiden
7
// STANDARD DECLARATIONS
8

    
9
import groovy.transform.Field
10

    
11
import org.jfree.chart.JFreeChart
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.annotation.urs.*
15
import org.txm.macro.urs.AnalecUtils
16
import org.txm.rcp.Application
17
import org.txm.rcp.IImageKeys
18
import org.txm.rcp.swt.widget.parameters.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20
import org.txm.searchengine.cqp.corpus.*
21
import org.txm.searchengine.cqp.corpus.query.CQLQuery
22

    
23
import visuAnalec.elements.*
24

    
25
def scriptName = this.class.getSimpleName()
26

    
27
if (!(corpusViewSelection instanceof CQPCorpus)) {
28
        println "** $scriptName please select a Corpus to run the macro"
29
        return;
30
}
31

    
32
// BEGINNING OF PARAMETERS
33
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
34
                String schema_ursql
35
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
36
                String schema_property_display
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
38
                debug
39
if (!ParametersDialog.open(this)) return
40
        if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
41

    
42
def CQI = CQPSearchEngine.getCqiClient()
43

    
44
def corpus = corpusViewSelection
45

    
46
def word = corpus.getWordProperty()
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

    
49

    
50

    
51
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
52
if (errors.size() > 0) {
53
        println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
54
        return;
55
}
56

    
57
if (schema_property_display.length() > 0) {
58
        errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(schema_ursql)[0], schema_property_display).size()
59
        if (errors > 0) {
60
                println "Error: some Schema types don't contain the $schema_property_display property: $errors"
61
                return
62
        }
63
}
64

    
65
def allUnits = [:]
66

    
67
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE, false)
68

    
69
if (allSchemas.size() == 0) {
70
        println "No schema match for '$schema_ursql' selection. Aborting"
71
        return
72
}
73

    
74
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas)
75

    
76
if (allUnits.size() == 0) {
77
        println "No unit selection. Aborting"
78
        return
79
}
80

    
81
if (debug) println "allUnits=${allUnits.size()}"
82

    
83
def duplicates = [:]
84
for (Schema schema : allSchemas) {
85
        def units = allUnits[schema];
86
        for (def unit : units) {
87
                if (!duplicates.containsKey(unit)) duplicates[unit] = []
88
                duplicates[unit] << schema
89
        }
90
}
91

    
92
def units = []
93
units.addAll(duplicates.keySet()) // remove non duplicates from hash
94
for (def unit : units) {
95
        if (duplicates[unit].size() < 2) duplicates.remove(unit)
96
}
97

    
98
if (duplicates.size() > 0) {
99
        println "${duplicates.size()} duplicates found"
100
        for (def unit : duplicates.keySet()) {
101
                println AnalecUtils.toString(CQI, word, unit)+" in: "
102
                for (Schema schema : duplicates[unit]) {
103
                        println " '"+schema.getProp(schema_property_display)+"'\t"+schema.getProps()
104
                }
105
        }
106
} else {
107
        println "No duplicates found in $schema_ursql schema units"
108
}
109

    
110
return duplicates