Révision 2085
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/NatureOfTheFirstUnitMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
// BEGINNING OF PARAMETERS |
|
25 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
26 |
String schema_ursql |
|
27 |
|
|
28 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
29 |
int minimum_schema_size |
|
30 |
|
|
31 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
32 |
String unit_ursql |
|
33 |
|
|
34 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE") |
|
35 |
String word_property |
|
36 |
|
|
37 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
38 |
debug |
|
39 |
|
|
40 |
if (!ParametersDialog.open(this)) return; |
|
41 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
42 |
|
|
43 |
|
|
44 |
CQPCorpus corpus = corpusViewSelection |
|
45 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
46 |
|
|
47 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
53 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
54 |
return; |
|
55 |
} |
|
56 |
|
|
57 |
def CQI = CQPSearchEngine.getCqiClient() |
|
58 |
|
|
59 |
def prop = corpus.getProperty(word_property) |
|
60 |
|
|
61 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
62 |
def freqs = [:] |
|
63 |
|
|
64 |
def distances = 0; |
|
65 |
def nDistances = 0 |
|
66 |
for (def schema : schemas) { |
|
67 |
|
|
68 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
69 |
|
|
70 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
71 |
|
|
72 |
int nUnites = units.size(); |
|
73 |
|
|
74 |
if (units.size() == 0) continue; |
|
75 |
|
|
76 |
def unit = units[0] |
|
77 |
|
|
78 |
String forme = null; |
|
79 |
if (prop == null) { // word_property is the analec unit property to use |
|
80 |
forme = unit.getProp(word_property) |
|
81 |
} else { |
|
82 |
int[] pos = null; |
|
83 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
84 |
else pos = unit.getDeb()..unit.getFin() |
|
85 |
|
|
86 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
87 |
} |
|
88 |
|
|
89 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
90 |
|
|
91 |
freqs[forme] = freqs[forme] + 1; |
|
92 |
} |
|
93 |
|
|
94 |
println "Index des natures de premier maillon :" |
|
95 |
int max = 0; |
|
96 |
def result = ""; |
|
97 |
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) { |
|
98 |
println "$forme\t"+freqs[forme] |
|
99 |
if (max < freqs[forme]) { |
|
100 |
max = freqs[forme] |
|
101 |
result = "$forme: "+freqs[forme] |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
["result": result, "data": freqs] |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/NumberOfSchemaMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
// BEGINNING OF PARAMETERS |
|
22 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
23 |
String schema_ursql |
|
24 |
|
|
25 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
26 |
int minimum_schema_size |
|
27 |
|
|
28 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
29 |
debug |
|
30 |
|
|
31 |
if (!ParametersDialog.open(this)) return; |
|
32 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
33 |
|
|
34 |
|
|
35 |
CQPCorpus corpus = corpusViewSelection |
|
36 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
37 |
|
|
38 |
// check Schema parameters |
|
39 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
40 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
41 |
return; |
|
42 |
} |
|
43 |
|
|
44 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
45 |
|
|
46 |
int nSchemas = schemas.size(); |
|
47 |
|
|
48 |
println "Nombre de chaînes de référence d'un texte : $nSchemas" |
|
49 |
|
|
50 |
["result":nSchemas, "data":schemas] |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/exploit/GrammaticalCategoryMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.macro.urs.AnalecUtils |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.rcp.swt.widget.parameters.* |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.searchengine.cqp.* |
|
16 |
import org.txm.searchengine.cqp.corpus.* |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
|
|
21 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
22 |
String schema_ursql |
|
23 |
|
|
24 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
25 |
int minimum_schema_size |
|
26 |
|
|
27 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
28 |
String schema_display_property_name |
|
29 |
|
|
30 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
31 |
String unit_ursql |
|
32 |
|
|
33 |
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE") |
|
34 |
String property |
|
35 |
|
|
36 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
37 |
debug |
|
38 |
|
|
39 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
40 |
println "Corpora selection is not a Corpus" |
|
41 |
return; |
|
42 |
} |
|
43 |
|
|
44 |
// Open the parameters input dialog box |
|
45 |
if (!ParametersDialog.open(this)) return; |
|
46 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
47 |
|
|
48 |
// END OF PARAMETERS |
|
49 |
|
|
50 |
MainCorpus corpus = corpusViewSelection |
|
51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
52 |
|
|
53 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
54 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
55 |
return; |
|
56 |
} |
|
57 |
|
|
58 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
59 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
60 |
return; |
|
61 |
} |
|
62 |
|
|
63 |
def CQI = CQPSearchEngine.getCqiClient() |
|
64 |
|
|
65 |
def prop = corpus.getProperty(property) |
|
66 |
if (prop == null) { |
|
67 |
analecCorpus.getStructure() |
|
68 |
} |
|
69 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
70 |
def allFreqs = [:] |
|
71 |
def n = 0 |
|
72 |
for (def schema : schemas) { |
|
73 |
n++ |
|
74 |
|
|
75 |
def freqs = [:] |
|
76 |
|
|
77 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
78 |
|
|
79 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
80 |
|
|
81 |
for (def unit : units) { // no need to sort units |
|
82 |
|
|
83 |
String forme = null; |
|
84 |
if (prop == null) { // property is the analec unit property to use |
|
85 |
forme = unit.getProp(property) |
|
86 |
} else { |
|
87 |
int[] pos = null; |
|
88 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
89 |
else pos = unit.getDeb()..unit.getFin() |
|
90 |
|
|
91 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
92 |
} |
|
93 |
|
|
94 |
if (!freqs.containsKey(forme)) freqs[forme] = 0; |
|
95 |
freqs[forme] = freqs[forme] + 1; |
|
96 |
|
|
97 |
if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0; |
|
98 |
allFreqs[forme] = allFreqs[forme] + 1; |
|
99 |
} |
|
100 |
|
|
101 |
if (schema_display_property_name != null) { |
|
102 |
println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : " |
|
103 |
} else { |
|
104 |
println "Index des natures de $schema_ursql - $n : " |
|
105 |
} |
|
106 |
|
|
107 |
int max = 0; |
|
108 |
def result = ""; |
|
109 |
for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
110 |
println forme.key+"\t"+forme.value |
|
111 |
} |
|
112 |
} |
|
113 |
|
|
114 |
int max = 0; |
|
115 |
def result = ""; |
|
116 |
|
|
117 |
println "Index des natures de $schema_ursql : " |
|
118 |
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) { |
|
119 |
println forme.key+"\t"+forme.value |
|
120 |
if (max < forme.value) { |
|
121 |
max = forme.value |
|
122 |
result = "$forme: "+forme.value |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
return ["result":result, "data":allFreqs] |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesNotInSchemaMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
21 |
String schema_type |
|
22 |
if (!ParametersDialog.open(this)) return; |
|
23 |
|
|
24 |
MainCorpus corpus = corpusViewSelection |
|
25 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
26 |
|
|
27 |
def unitesInSchema = new HashSet() |
|
28 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
29 |
unitesInSchema.addAll(schema.getUnitesSousjacentes()) |
|
30 |
} |
|
31 |
println "unites: "+analecCorpus.getToutesUnites().size() |
|
32 |
println "unites in schema: "+unitesInSchema.size() |
|
33 |
|
|
34 |
def set = new HashMap() |
|
35 |
for (def u : analecCorpus.getToutesUnites()) { |
|
36 |
if (unitesInSchema.contains(u)) continue; |
|
37 |
|
|
38 |
if (!set.containsKey(u.getType())) set[u.getType()] = 0; |
|
39 |
set[u.getType()] = set[u.getType()] +1 |
|
40 |
} |
|
41 |
|
|
42 |
println "unites not in schema: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitsProgressionMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import java.util.ArrayList; |
|
9 |
import java.util.List; |
|
10 |
|
|
11 |
import org.apache.commons.lang.StringUtils |
|
12 |
import org.jfree.chart.JFreeChart |
|
13 |
import org.jfree.chart.plot.XYPlot |
|
14 |
import org.kohsuke.args4j.* |
|
15 |
|
|
16 |
import groovy.transform.Field |
|
17 |
|
|
18 |
import org.txm.Toolbox |
|
19 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
|
20 |
import org.txm.progression.core.functions.Progression |
|
21 |
import org.txm.rcp.swt.widget.parameters.* |
|
22 |
import org.txm.annotation.urs.* |
|
23 |
import org.txm.chartsengine.rcp.editors.ChartEditor |
|
24 |
import org.txm.macro.urs.AnalecUtils |
|
25 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
26 |
import org.txm.searchengine.cqp.corpus.* |
|
27 |
import org.txm.searchengine.cqp.corpus.query.Match; |
|
28 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
29 |
import org.txm.rcp.Application |
|
30 |
import org.txm.rcp.IImageKeys |
|
31 |
|
|
32 |
import visuAnalec.donnees.Structure |
|
33 |
import visuAnalec.elements.* |
|
34 |
|
|
35 |
def scriptName = this.class.getSimpleName() |
|
36 |
def parent |
|
37 |
def selection = [] |
|
38 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
39 |
println "** $scriptName please select a Corpus to run the macro" |
|
40 |
} |
|
41 |
selection << corpusViewSelection |
|
42 |
parent = corpusViewSelection |
|
43 |
|
|
44 |
// BEGINNING OF PARAMETERS |
|
45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
46 |
String schema_ursql |
|
47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
48 |
int minimum_schema_size |
|
49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
50 |
int maximum_schema_size |
|
51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
52 |
String unit_ursql |
|
53 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
54 |
int limit_distance_in_schema |
|
55 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
56 |
limit_cql |
|
57 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
58 |
boolean strict_inclusion |
|
59 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
60 |
int limit_distance |
|
61 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE") |
|
62 |
String unit_property_display |
|
63 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
|
64 |
String struct_name |
|
65 |
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n") |
|
66 |
String struct_prop |
|
67 |
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1") |
|
68 |
int line_width = 2 |
|
69 |
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f") |
|
70 |
float bande_width = 1.0f |
|
71 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
72 |
debug |
|
73 |
if (!ParametersDialog.open(this)) return |
|
74 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
75 |
|
|
76 |
|
|
77 |
def CQI = CQPSearchEngine.getCqiClient() |
|
78 |
|
|
79 |
def queries = [] |
|
80 |
def queryResults = [] |
|
81 |
def informations = [] |
|
82 |
for (def corpus : selection) { |
|
83 |
|
|
84 |
mainCorpus = corpus.getMainCorpus() |
|
85 |
|
|
86 |
def word = mainCorpus.getWordProperty() |
|
87 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
88 |
|
|
89 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
90 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
91 |
|
|
92 |
def query = "" |
|
93 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql |
|
94 |
if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"} |
|
95 |
if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql |
|
96 |
query = new CQLQuery(query) |
|
97 |
int[] starts = new int[selectedUnits.size()]; |
|
98 |
int[] ends = new int[selectedUnits.size()]; |
|
99 |
def unitsinformations = [] |
|
100 |
int n = 0; |
|
101 |
for (Unite unite : selectedUnits) { |
|
102 |
starts[n] = unite.getDeb() |
|
103 |
ends[n] = unite.getFin() |
|
104 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
105 |
n++ |
|
106 |
} |
|
107 |
def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
108 |
queries << query |
|
109 |
queryResults << queryResult |
|
110 |
informations << unitsinformations |
|
111 |
|
|
112 |
if (unit_property_display != null && unit_property_display.length() > 0) { |
|
113 |
def propvalues = [:] |
|
114 |
for (def unit : selectedUnits) { |
|
115 |
def v = unit.getProp(unit_property_display) |
|
116 |
if (v == null) v = "<null>" |
|
117 |
else if (v.length() == 0) v = "<empty>" |
|
118 |
|
|
119 |
if (!propvalues.containsKey(v))propvalues[v] = [] |
|
120 |
propvalues[v] << unit |
|
121 |
} |
|
122 |
|
|
123 |
for (def v : propvalues.keySet().sort()) { |
|
124 |
selectedUnits = propvalues[v] |
|
125 |
query = corpus.getID()+" "+limit_cql |
|
126 |
query = new CQLQuery(v) |
|
127 |
starts = new int[selectedUnits.size()]; |
|
128 |
ends = new int[selectedUnits.size()]; |
|
129 |
unitsinformations = [] |
|
130 |
n = 0; |
|
131 |
for (Unite unite : selectedUnits) { |
|
132 |
starts[n] = unite.getDeb() |
|
133 |
ends[n] = unite.getFin() |
|
134 |
unitsinformations << AnalecUtils.toString(CQI, word, unite); |
|
135 |
n++ |
|
136 |
} |
|
137 |
queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null) |
|
138 |
queries << query |
|
139 |
queryResults << queryResult |
|
140 |
informations << unitsinformations |
|
141 |
} |
|
142 |
} |
|
143 |
} |
|
144 |
|
|
145 |
corpus = parent |
|
146 |
try { |
|
147 |
def struct = corpus.getStructuralUnit(struct_name) |
|
148 |
def struct_p = struct.getProperty(struct_prop) |
|
149 |
|
|
150 |
Progression progression = new Progression(corpus, queries, |
|
151 |
struct, struct_p, ".*", |
|
152 |
true, false, false, |
|
153 |
line_width, false, bande_width) |
|
154 |
|
|
155 |
progression.stepQueries(queryResults); // new |
|
156 |
|
|
157 |
if (!progression.stepStructuralUnits() || monitor.isCanceled()) return |
|
158 |
monitor.worked(20) |
|
159 |
if (!progression.stepFinalize() || monitor.isCanceled()) return |
|
160 |
monitor.worked(20) |
|
161 |
|
|
162 |
monitor.syncExec(new Runnable() { |
|
163 |
@Override |
|
164 |
public void run() { |
|
165 |
try { |
|
166 |
ChartEditor charteditorpart = SWTChartsComponentProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
|
167 |
JFreeChart chart = charteditorpart.getChart() |
|
168 |
def plot = chart.getXYPlot() |
|
169 |
ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
170 |
renderer.setAdditionalLabelInformation(informations) |
|
171 |
} catch(Exception e) {e.printStackTrace()} |
|
172 |
} |
|
173 |
}) |
|
174 |
|
|
175 |
} catch(Exception e) { |
|
176 |
e.printStackTrace() |
|
177 |
return false |
|
178 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/SchemasListOldMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macroprototypes.urs.misc |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.Toolbox |
|
15 |
import org.txm.rcp.commands.* |
|
16 |
import org.apache.commons.lang.StringUtils |
|
17 |
|
|
18 |
// BEGINNING OF PARAMETERS |
|
19 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAIE") |
|
20 |
String schema_type |
|
21 |
|
|
22 |
@Field @Option(name="minimum_schema_size",usage="", widget="Integer", required=true, def="3") |
|
23 |
int minimum_schema_size |
|
24 |
|
|
25 |
@Field @Option(name="schema_property_name",usage="", widget="String", required=false, def="") |
|
26 |
String schema_property_name |
|
27 |
|
|
28 |
@Field @Option(name="schema_property_value",usage="", widget="String", required=false, def=".*") |
|
29 |
String schema_property_value |
|
30 |
|
|
31 |
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION") |
|
32 |
String unit_type |
|
33 |
|
|
34 |
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="") |
|
35 |
String unit_property_name |
|
36 |
|
|
37 |
@Field @Option(name="unit_property_value", usage="", widget="String", required=false, def=".*") |
|
38 |
String unit_property_value |
|
39 |
|
|
40 |
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word lemma frlemma frolemma #forme# id", required=false, def="word") |
|
41 |
String word_property |
|
42 |
|
|
43 |
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ") |
|
44 |
String separator |
|
45 |
|
|
46 |
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false') |
|
47 |
def buildCQL |
|
48 |
|
|
49 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
50 |
println "Corpus view selection is not a Corpus" |
|
51 |
return; |
|
52 |
} |
|
53 |
|
|
54 |
if (!ParametersDialog.open(this)) return; |
|
55 |
// END OF PARAMETERS |
|
56 |
|
|
57 |
MainCorpus corpus = corpusViewSelection |
|
58 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
59 |
|
|
60 |
// check Schema parameters |
|
61 |
if (!analecCorpus.getStructure().getSchemas().contains(schema_type)) { |
|
62 |
println "No schema with name=$schema_type" |
|
63 |
return; |
|
64 |
} else { |
|
65 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
66 |
// test property existance |
|
67 |
def props = analecCorpus.getStructure().getSchemaProperties(schema_type); |
|
68 |
if (!props.contains(schema_property_name)) { |
|
69 |
println "Schema $schema_type has no property named $schema_property_name" |
|
70 |
return; |
|
71 |
} |
|
72 |
} |
|
73 |
} |
|
74 |
|
|
75 |
// check unit parameters |
|
76 |
if (!analecCorpus.getStructure().getUnites().contains(unit_type)) { |
|
77 |
println "No unit with name=$unit_type" |
|
78 |
return; |
|
79 |
} else { |
|
80 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
81 |
// test property existance |
|
82 |
def props = analecCorpus.getStructure().getUniteProperties(unit_type); |
|
83 |
if (!props.contains(unit_property_name)) { |
|
84 |
println "Unit $unit_type has no property named $unit_property_name" |
|
85 |
return; |
|
86 |
} |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
def CQI = CQPSearchEngine.getCqiClient() |
|
91 |
|
|
92 |
if (buildCQL) { |
|
93 |
word_prop = corpus.getProperty("id") |
|
94 |
} else { |
|
95 |
word_prop = corpus.getProperty(word_property) |
|
96 |
} |
|
97 |
|
|
98 |
def schemas = analecCorpus.getSchemas(schema_type) |
|
99 |
schemas.sort() {it.getProps()} |
|
100 |
def nSchemas = 0 |
|
101 |
|
|
102 |
def lens = [:] |
|
103 |
for (def schema : schemas) { |
|
104 |
|
|
105 |
if (schema_property_name.length() > 0 && schema_property_value.length() > 0) { |
|
106 |
if (!schema.getProp(schema_property_name).matches(schema_property_value)) { |
|
107 |
// ignoring this schema |
|
108 |
continue |
|
109 |
} |
|
110 |
} |
|
111 |
|
|
112 |
def nUnites = 0 |
|
113 |
for (def unit : schema.getUnitesSousjacentes()) { |
|
114 |
if (unit_type.length() > 0) { |
|
115 |
if (!unit.getType().equals(unit_type)) { |
|
116 |
continue |
|
117 |
} |
|
118 |
} |
|
119 |
|
|
120 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
121 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
122 |
// ignoring this schema |
|
123 |
continue |
|
124 |
} |
|
125 |
} |
|
126 |
|
|
127 |
nUnites++ |
|
128 |
} |
|
129 |
|
|
130 |
if (nUnites < minimum_schema_size) continue |
|
131 |
|
|
132 |
print schema.getProps().toString()+ ": " |
|
133 |
def first = true |
|
134 |
for (def unit : schema.getUnitesSousjacentes()) { |
|
135 |
if (unit_type.length() > 0) { |
|
136 |
if (!unit.getType().equals(unit_type)) { |
|
137 |
continue |
|
138 |
} |
|
139 |
} |
|
140 |
|
|
141 |
if (unit_property_name.length() > 0 && unit_property_value.length() > 0) { |
|
142 |
if (!unit.getProp(unit_property_name).matches(unit_property_value)) { |
|
143 |
// ignoring this schema |
|
144 |
continue |
|
145 |
} |
|
146 |
} |
|
147 |
|
|
148 |
String forme = null; |
|
149 |
|
|
150 |
if (buildCQL) { |
|
151 |
int[] pos = null |
|
152 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
153 |
else pos = (unit.getDeb()..unit.getFin()) |
|
154 |
def first2= true |
|
155 |
q = "" |
|
156 |
pos.each { |
|
157 |
if (first2) { first2 = false } else { q = q+" " } |
|
158 |
int[] pos2 = [it] |
|
159 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
160 |
} |
|
161 |
if (first) { first = false } else { print "|" } |
|
162 |
print "("+q+")" |
|
163 |
} else { |
|
164 |
if (word_prop == null) { // word_property is the analec unit property to use |
|
165 |
forme = unit.getProp(word_property) |
|
166 |
} else { |
|
167 |
int[] pos = null |
|
168 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
169 |
else pos = (unit.getDeb()..unit.getFin()) |
|
170 |
|
|
171 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
172 |
} |
|
173 |
|
|
174 |
if (first) { first = false } else { print separator } |
|
175 |
print forme |
|
176 |
} |
|
177 |
} |
|
178 |
println "" |
|
179 |
|
|
180 |
nSchemas++ |
|
181 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/EmptyPropValuesMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
|
|
11 |
import groovy.transform.Field |
|
12 |
|
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.rcp.swt.widget.parameters.* |
|
15 |
import org.txm.annotation.urs.* |
|
16 |
import org.txm.macro.urs.AnalecUtils |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery |
|
20 |
|
|
21 |
import visuAnalec.donnees.Structure |
|
22 |
import visuAnalec.elements.* |
|
23 |
|
|
24 |
def scriptName = this.class.getSimpleName() |
|
25 |
|
|
26 |
def selection = [] |
|
27 |
for (def s : corpusViewSelections) { |
|
28 |
if (s instanceof CQPCorpus) selection << s |
|
29 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
30 |
} |
|
31 |
|
|
32 |
if (selection.size() == 0) { |
|
33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
34 |
return false |
|
35 |
} |
|
36 |
|
|
37 |
// BEGINNING OF PARAMETERS |
|
38 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
39 |
String schema_ursql |
|
40 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
41 |
int minimum_schema_size |
|
42 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
43 |
String unit_ursql |
|
44 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
45 |
limit_cql |
|
46 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
47 |
boolean strict_inclusion |
|
48 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
49 |
int limit_distance |
|
50 |
@Field @Option(name="debug", usage="Show internal variable content", widget="Boolean", required=true, def="false") |
|
51 |
debug |
|
52 |
if (!ParametersDialog.open(this)) return |
|
53 |
|
|
54 |
def CQI = CQPSearchEngine.getCqiClient() |
|
55 |
|
|
56 |
//corpus = corpusViewSelection |
|
57 |
for (def corpus : selection) { |
|
58 |
|
|
59 |
mainCorpus = corpus.getMainCorpus() |
|
60 |
|
|
61 |
def word = mainCorpus.getWordProperty() |
|
62 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName()) |
|
63 |
|
|
64 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, Integer.MAX_VALUE, |
|
65 |
unit_ursql, limit_cql, strict_inclusion, limit_distance); |
|
66 |
|
|
67 |
for (def unit : selectedUnits) { |
|
68 |
def props = unit.getProps(); |
|
69 |
for (def k : props.keySet()) { |
|
70 |
if (props[k] == null) { |
|
71 |
println "$corpus\t"+unit.getDeb()+"->"+unit.getFin()+"\t"+k |
|
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="MENTION") |
|
21 |
String schema_type |
|
22 |
|
|
23 |
if (!ParametersDialog.open(this)) return; |
|
24 |
|
|
25 |
MainCorpus corpus = corpusViewSelection |
|
26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
27 |
def map = new HashMap() |
|
28 |
def unitesInSchema = [] |
|
29 |
def n = 0 |
|
30 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
31 |
def unites = schema.getUnitesSousjacentes() |
|
32 |
unitesInSchema.addAll(unites) |
|
33 |
n += unites.size() |
|
34 |
} |
|
35 |
|
|
36 |
def counts = unitesInSchema.countBy() { it }; |
|
37 |
for (def c : counts.keySet()) { |
|
38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()} |
|
39 |
} |
|
40 |
|
|
41 |
def set = new HashSet() |
|
42 |
set.addAll(unitesInSchema) |
|
43 |
for (def s : set.collect { it.getType() }) { |
|
44 |
if (!map.containsKey(s)) map[s] = 0; |
|
45 |
map[s] = map[s] +1 |
|
46 |
} |
|
47 |
|
|
48 |
println "Unites types: "+map.sort() { it -> map[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitsCorrelationMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils |
|
9 |
import org.txm.rcp.views.corpora.CorporaView |
|
10 |
import groovy.transform.Field |
|
11 |
|
|
12 |
import org.kohsuke.args4j.* |
|
13 |
import org.txm.Toolbox |
|
14 |
import org.txm.annotation.urs.* |
|
15 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl |
|
16 |
import org.txm.macro.urs.* |
|
17 |
import org.txm.rcp.commands.* |
|
18 |
import org.txm.rcp.swt.widget.parameters.* |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
import org.txm.searchengine.cqp.corpus.* |
|
21 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
|
22 |
import org.txm.statsengine.r.core.RWorkspace |
|
23 |
|
|
24 |
import visuAnalec.donnees.* |
|
25 |
import visuAnalec.elements.* |
|
26 |
import cern.colt.matrix.DoubleFactory2D |
|
27 |
import cern.colt.matrix.DoubleMatrix2D |
|
28 |
|
|
29 |
def scriptName = this.class.getSimpleName() |
|
30 |
def parent |
|
31 |
def selection = [] |
|
32 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
33 |
println "** $scriptName please select a Corpus to run the macro" |
|
34 |
} |
|
35 |
|
|
36 |
|
|
37 |
// BEGINNING OF PARAMETERS |
|
38 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
39 |
String schema_ursql |
|
40 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3") |
|
41 |
int minimum_schema_size |
|
42 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
|
43 |
int maximum_schema_size |
|
44 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
45 |
String unit_ursql |
|
46 |
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
47 |
int limit_distance_in_schema |
|
48 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
49 |
limit_cql |
|
50 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
51 |
boolean strict_inclusion |
|
52 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
53 |
int limit_distance |
|
54 |
@Field @Option(name="unit_prop1", usage="PROP1", widget="String", required=false, def="PROP1") |
|
55 |
String unit_prop1 |
|
56 |
@Field @Option(name="unit_prop2", usage="PROP2", widget="String", required=false, def="PROP2") |
|
57 |
String unit_prop2 |
|
58 |
@Field @Option(name="corr_method", usage="try them all", widget="StringArray", metaVar="pearson spearman kendall", required=false, def="pearson") |
|
59 |
String corr_method |
|
60 |
@Field @Option(name="corr_style", usage="try them all", widget="StringArray", metaVar="circle square ellipse number shade color pie", required=false, def="number") |
|
61 |
String corr_style |
|
62 |
@Field @Option(name="corr_layout", usage="try them all", widget="StringArray", metaVar="full lower upper", required=false, def="upper") |
|
63 |
String corr_layout |
|
64 |
@Field @Option(name="corr_order", usage="try them all", widget="StringArray", metaVar="AOE FPC hclust alphabet", required=false, def="hclust") |
|
65 |
String corr_order |
|
66 |
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false") |
|
67 |
output_lexicaltable |
|
68 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
69 |
debug |
|
70 |
if (!ParametersDialog.open(this)) return |
|
71 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
72 |
|
|
73 |
|
|
74 |
def CQI = CQPSearchEngine.getCqiClient() |
|
75 |
|
|
76 |
def correlations = [:] |
|
77 |
def values1 = new HashSet() |
|
78 |
def values2 = new HashSet() |
|
79 |
def corpus = corpusViewSelection |
|
80 |
|
|
81 |
mainCorpus = corpus.getMainCorpus() |
|
82 |
|
|
83 |
def word = mainCorpus.getWordProperty() |
|
84 |
def analecCorpus = URSCorpora.getCorpus(mainCorpus) |
|
85 |
|
|
86 |
|
|
87 |
|
|
88 |
def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, |
|
89 |
unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance); |
|
90 |
|
|
91 |
for (def unit : selectedUnits) { |
|
92 |
def value1 = unit.getProp(unit_prop1) |
|
93 |
if (value1 == null) value1 = "<null>" |
|
94 |
if (value1.length() == 0) value1 = "<empty>" |
|
95 |
def value2 = unit.getProp(unit_prop2) |
|
96 |
if (value2 == null) value2 = "<null>" |
|
97 |
if (value2.length() == 0) value2 = "<empty>" |
|
98 |
|
|
99 |
values1 << value1 |
|
100 |
values2 << value2 |
|
101 |
|
|
102 |
if (!correlations.containsKey(value1)) correlations[value1] = [:] |
|
103 |
def line = correlations[value1] |
|
104 |
if (!line.containsKey(value2)) line[value2] = 0 |
|
105 |
line[value2] += 1 |
|
106 |
} |
|
107 |
|
|
108 |
def matrix = new int[values1.size()][values2.size()]; |
|
109 |
println "\t"+values2.join("\t") |
|
110 |
int i = 0; |
|
111 |
for (def value1 : values1) { |
|
112 |
print value1 |
|
113 |
int j = 0; |
|
114 |
for (def value2 : values2) { |
|
115 |
if (correlations[value1][value2] == null) correlations[value1][value2] = 0; |
|
116 |
print "\t"+correlations[value1][value2] |
|
117 |
|
|
118 |
matrix[i][j] = correlations[value1][value2] |
|
119 |
j++ |
|
120 |
} |
|
121 |
println "" |
|
122 |
i++ |
|
123 |
} |
|
124 |
|
|
125 |
def r = RWorkspace.getRWorkspaceInstance() |
|
126 |
r.addVectorToWorkspace("corrlines", values1 as String[]) |
|
127 |
r.addVectorToWorkspace("corrcols", values2 as String[]) |
|
128 |
r.addMatrixToWorkspace("corrmatrix", matrix) |
|
129 |
r.eval("rownames(corrmatrix) = corrlines") |
|
130 |
r.eval("colnames(corrmatrix) = corrcols") |
|
131 |
|
|
132 |
def resultsDir = new File(Toolbox.getTxmHomePath(), "results") |
|
133 |
resultsDir.mkdirs() |
|
134 |
file = File.createTempFile("txm_corr_pairs_", ".svg", resultsDir) |
|
135 |
|
|
136 |
def title = "${corpus.getMainCorpus()}.${corpus}\n${unit_ursql}" |
|
137 |
if (limit_distance > 1) title += "[${limit_distance}]." |
|
138 |
if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) title += "\n(${limit_cql} limits)" |
|
139 |
title += "\t P1=$unit_prop1 P2=unit_prop2" |
|
140 |
|
|
141 |
def plotScript = """ |
|
142 |
|
|
143 |
r1 = cor(corrmatrix, use="complete.obs", method="$corr_method"); |
|
144 |
r2 = cov(corrmatrix, use="complete.obs") ; |
|
145 |
|
|
146 |
corrplot(r1, type="$corr_layout", order="$corr_order", method="$corr_style") |
|
147 |
""" |
|
148 |
|
|
149 |
|
|
150 |
// execute R script |
|
151 |
if (!output_lexicaltable) { |
|
152 |
try { |
|
153 |
r.eval("library(corrplot)") |
|
154 |
try { |
|
155 |
r.plot(file, plotScript) |
|
156 |
} catch (Exception e) { |
|
157 |
println "** Error: "+e |
|
158 |
} |
|
159 |
} catch (Exception e) { |
|
160 |
println "** The 'corrplot' R package is not installed. Start R ("+RWorkspace.getExecutablePath()+") and run 'install.packages(\"corrplot\");'." |
|
161 |
} |
|
162 |
} |
|
163 |
title = "$unit_prop1 $corr_method correlations" |
|
164 |
|
|
165 |
|
|
166 |
def lt = null; |
|
167 |
if (output_lexicaltable) { |
|
168 |
mFactory = DoubleFactory2D.dense |
|
169 |
dmatrix = mFactory.make(values1.size(), values2.size()) |
|
170 |
for (int ii = 0 ; ii < values1.size() ; ii++) { |
|
171 |
for (int jj = 0 ; jj < values2.size() ; jj++) { |
|
172 |
dmatrix.set(ii, jj, matrix[ii][jj]) |
|
173 |
} |
|
174 |
} |
|
175 |
if (corpusViewSelection instanceof Partition) { |
|
176 |
lt = new LexicalTableImpl(dmatrix, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"), |
|
177 |
values1 as String[], values2 as String[]) |
|
178 |
lt.setCorpus(corpusViewSelection.getCorpus()); |
|
179 |
corpusViewSelection.storeResult(lt) |
|
180 |
} else { |
|
181 |
lt = new LexicalTableImpl(dmatrix, corpus.getProperty("word"), |
|
182 |
values1 as String[], values2 as String[]) |
|
183 |
lt.setCorpus(corpus); |
|
184 |
corpus.storeResult(lt) |
|
185 |
} |
|
186 |
} |
|
187 |
|
|
188 |
monitor.syncExec(new Runnable() { |
|
189 |
@Override |
|
190 |
public void run() { try { |
|
191 |
|
|
192 |
if (output_lexicaltable) { |
|
193 |
CorporaView.refreshObject(corpus) |
|
194 |
CorporaView.expand(lt) |
|
195 |
} else { |
|
196 |
OpenBrowser.openfile(file.getAbsolutePath(), "Correlations Units") |
|
197 |
} |
|
198 |
} catch (e) { e.printStackTrace() }} |
|
199 |
}) |
|
200 |
|
|
201 |
return correlations |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/SchemaTypesMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
MainCorpus corpus = corpusViewSelection |
|
20 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
21 |
|
|
22 |
def schemas = analecCorpus.getTousSchemas() |
|
23 |
def set = new HashMap() |
|
24 |
for (def s : schemas.collect { it.getType() }) { |
|
25 |
if (!set.containsKey(s)) set[s] = 0; |
|
26 |
set[s] = set[s] +1 |
|
27 |
} |
|
28 |
println "Schemas types: "+set.sort() { it -> set[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/CompUnitPropertiesMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.Toolbox; |
|
12 |
import org.txm.rcp.swt.widget.parameters.* |
|
13 |
import org.txm.annotation.urs.* |
|
14 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
15 |
import org.txm.searchengine.cqp.corpus.* |
|
16 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
17 |
import visuAnalec.donnees.Structure; |
|
18 |
import visuAnalec.elements.Unite; |
|
19 |
|
|
20 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
21 |
println "Corpora selection is not a Corpus" |
|
22 |
return; |
|
23 |
} |
|
24 |
|
|
25 |
// BEGINNING OF PARAMETERS |
|
26 |
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION") |
|
27 |
String unit_type |
|
28 |
|
|
29 |
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true") |
|
30 |
boolean print_diff |
|
31 |
|
|
32 |
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE") |
|
33 |
String unit_property_name1 |
|
34 |
|
|
35 |
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG") |
|
36 |
String unit_property_name2 |
|
37 |
|
|
38 |
if (!ParametersDialog.open(this)) return; |
|
39 |
|
|
40 |
int n = 1; |
|
41 |
int nDiff = 0; |
|
42 |
MainCorpus corpus = corpusViewSelection |
|
43 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
44 |
def word = corpus.getWordProperty() |
|
45 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
46 |
|
|
47 |
def units = analecCorpus.getUnites(unit_type) |
|
48 |
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() } |
|
49 |
for (Unite unit : units) { |
|
50 |
int[] pos = null |
|
51 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
52 |
else pos = (unit.getDeb()..unit.getFin()) |
|
53 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
54 |
def props = unit.getProps() |
|
55 |
def v1 = props.get(unit_property_name1); |
|
56 |
def v2 = props.get(unit_property_name2); |
|
57 |
|
|
58 |
if (v1 != v2) { |
|
59 |
if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form" |
|
60 |
nDiff++ |
|
61 |
} |
|
62 |
n++ |
|
63 |
} |
|
64 |
|
|
65 |
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values." |
|
66 |
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values." |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/RelationsListMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.tools.ant.types.resources.selectors.InstanceOf; |
|
10 |
import org.kohsuke.args4j.* |
|
11 |
|
|
12 |
import groovy.transform.Field |
|
13 |
|
|
14 |
import org.txm.Toolbox; |
|
15 |
import org.txm.rcp.swt.widget.parameters.* |
|
16 |
import org.txm.annotation.urs.* |
|
17 |
import org.txm.searchengine.cqp.AbstractCqiClient; |
|
18 |
import org.txm.searchengine.cqp.corpus.* |
|
19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
20 |
|
|
21 |
import visuAnalec.donnees.Structure; |
|
22 |
import visuAnalec.elements.Relation |
|
23 |
import visuAnalec.elements.Unite; |
|
24 |
|
|
25 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
26 |
println "Corpora selection is not a Corpus" |
|
27 |
return; |
|
28 |
} |
|
29 |
|
|
30 |
// BEGINNING OF PARAMETERS |
|
31 |
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE") |
|
32 |
String relation_type |
|
33 |
|
|
34 |
if (!ParametersDialog.open(this)) return; |
|
35 |
|
|
36 |
MainCorpus corpus = corpusViewSelection |
|
37 |
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient(); |
|
38 |
def word = corpus.getWordProperty() |
|
39 |
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus); |
|
40 |
|
|
41 |
int n = 1; |
|
42 |
def relations = null |
|
43 |
if (relation_type.length() > 0) { |
|
44 |
relations = [] |
|
45 |
for (String type : analecCorpus.getStructure().getTypes(Relation.class)) |
|
46 |
relations.addAll(analecCorpus.getRelations(type)) |
|
47 |
} else { |
|
48 |
relations = analecCorpus.getToutesRelations() |
|
49 |
} |
|
50 |
|
|
51 |
for (Relation relation : relations) { |
|
52 |
def unit1 = relation.getElt1(); |
|
53 |
def unit2 = relation.getElt2(); |
|
54 |
def props = relation.getProps() |
|
55 |
if (unit1 instanceof Unite && unit2 instanceof Unite) { |
|
56 |
int[] pos1 = null |
|
57 |
if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()] |
|
58 |
else pos1 = (unit1.getDeb()..unit1.getFin()) |
|
59 |
def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ") |
|
60 |
|
|
61 |
int[] pos2 = null |
|
62 |
if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()] |
|
63 |
else pos2 = (unit2.getDeb()..unit2.getFin()) |
|
64 |
def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ") |
|
65 |
|
|
66 |
println "$n - $props : $form1 -> $form2" |
|
67 |
} else { |
|
68 |
println "$n - $props" |
|
69 |
} |
|
70 |
n++ |
|
71 |
} |
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/misc/UnitTypesInSchemaMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macroprototypes.urs.misc |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import org.txm.searchengine.cqp.corpus.* |
|
13 |
|
|
14 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
15 |
println "Corpora selection is not a Corpus" |
|
16 |
return; |
|
17 |
} |
|
18 |
|
|
19 |
// BEGINNING OF PARAMETERS |
|
20 |
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE") |
|
21 |
String schema_type |
|
22 |
|
|
23 |
if (!ParametersDialog.open(this)) return; |
|
24 |
|
|
25 |
MainCorpus corpus = corpusViewSelection |
|
26 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
27 |
def map = new HashMap() |
|
28 |
def unitesInSchema = [] |
|
29 |
def n = 0 |
|
30 |
for (def schema : analecCorpus.getSchemas(schema_type)) { |
|
31 |
def unites = schema.getUnitesSousjacentes() |
|
32 |
unitesInSchema.addAll(unites) |
|
33 |
n += unites.size() |
|
34 |
} |
|
35 |
|
|
36 |
def counts = unitesInSchema.countBy() { it }; |
|
37 |
for (def c : counts.keySet()) { |
|
38 |
if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()} |
|
39 |
} |
|
40 |
|
|
41 |
def set = new HashSet() |
|
42 |
set.addAll(unitesInSchema) |
|
43 |
for (def s : set.collect { it.getType() }) { |
|
44 |
if (!map.containsKey(s)) map[s] = 0; |
|
45 |
map[s] = map[s] +1 |
|
46 |
} |
|
47 |
|
|
48 |
println "Unites types: "+map.sort() { it -> map[it]} |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NumberOfSchemaMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
// BEGINNING OF PARAMETERS |
|
22 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
23 |
String schema_ursql |
|
24 |
|
|
25 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
26 |
int minimum_schema_size |
|
27 |
|
|
28 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
29 |
String unit_ursql |
|
30 |
|
|
31 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
32 |
debug |
|
33 |
|
|
34 |
if (!ParametersDialog.open(this)) return; |
|
35 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
36 |
|
|
37 |
|
|
38 |
CQPCorpus corpus = corpusViewSelection |
|
39 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
40 |
|
|
41 |
// check Schema parameters |
|
42 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
43 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
44 |
return; |
|
45 |
} |
|
46 |
|
|
47 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
48 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
49 |
return; |
|
50 |
} |
|
51 |
|
|
52 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
53 |
|
|
54 |
int nSchemas = schemas.size(); |
|
55 |
|
|
56 |
println "Nombre de chaînes de référence d'un texte : $nSchemas" |
|
57 |
|
|
58 |
["result":nSchemas, "data":schemas] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/ReferentialDensityMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.rcp.swt.widget.parameters.* |
|
11 |
import org.txm.annotation.urs.* |
|
12 |
import visuAnalec.elements.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.txm.macro.urs.AnalecUtils |
|
15 |
|
|
16 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
17 |
println "Corpora selection is not a Corpus" |
|
18 |
return; |
|
19 |
} |
|
20 |
|
|
21 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
|
22 |
String unit_ursql |
|
23 |
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div") |
|
24 |
limit_cql |
|
25 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true") |
|
26 |
strict_inclusion |
|
27 |
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
|
28 |
limit_distance |
|
29 |
|
|
30 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
31 |
debug |
|
32 |
|
|
33 |
if (!ParametersDialog.open(this)) return; |
|
34 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
35 |
|
|
36 |
|
|
37 |
CQPCorpus corpus = corpusViewSelection |
|
38 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
|
39 |
|
|
40 |
int nMots = corpus.getSize(); |
|
41 |
|
|
42 |
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0, |
|
43 |
unit_ursql, 0, limit_cql, strict_inclusion, limit_distance); |
|
44 |
|
|
45 |
int nUnites = units.size(); |
|
46 |
|
|
47 |
coef = (nUnites /nMots) |
|
48 |
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%" |
|
49 |
if (nUnites >= nMots) { |
|
50 |
println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)" |
|
51 |
} |
|
52 |
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]] |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/StabilityScoreMacro.groovy (revision 2085) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
// STANDARD DECLARATIONS |
|
6 |
package org.txm.macro.urs.exploit |
|
7 |
|
|
8 |
import org.kohsuke.args4j.* |
|
9 |
import groovy.transform.Field |
|
10 |
import org.txm.* |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.annotation.urs.* |
|
13 |
import org.txm.searchengine.cqp.corpus.* |
|
14 |
import org.apache.commons.lang.StringUtils; |
|
15 |
import org.txm.macro.urs.AnalecUtils |
|
16 |
import visuAnalec.elements.* |
|
17 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
18 |
|
|
19 |
if (!(corpusViewSelection instanceof CQPCorpus)) { |
|
20 |
println "Corpora selection is not a Corpus" |
|
21 |
return; |
|
22 |
} |
|
23 |
|
|
24 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
|
25 |
String schema_ursql |
|
26 |
|
|
27 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
|
28 |
int minimum_schema_size |
|
29 |
|
|
30 |
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF") |
|
31 |
String schema_display_property_name |
|
32 |
|
|
33 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
|
34 |
String unit_ursql |
|
35 |
|
|
36 |
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word") |
|
37 |
String word_property |
|
38 |
|
|
39 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
|
40 |
debug |
|
41 |
|
|
42 |
if (!ParametersDialog.open(this)) return; |
|
43 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
|
44 |
|
|
45 |
|
|
46 |
def corpus = corpusViewSelection |
|
47 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
48 |
|
|
49 |
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) { |
|
50 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus." |
|
51 |
return; |
|
52 |
} |
|
53 |
|
|
54 |
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) { |
|
55 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus." |
|
56 |
return; |
|
57 |
} |
|
58 |
|
|
59 |
def CQI = CQPSearchEngine.getCqiClient() |
|
60 |
|
|
61 |
def prop = corpus.getProperty(word_property) |
|
62 |
|
|
63 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999); |
|
64 |
allFormesSet = new HashSet(); |
|
65 |
nUnitesGrandTotal = 0; |
|
66 |
def coefs = [] |
|
67 |
int n = 1 |
|
68 |
for (def schema : schemas) { |
|
69 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
70 |
nUnitesTotal = 0; |
|
71 |
|
|
72 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
73 |
|
|
74 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
75 |
def nUnites = units.size() |
|
76 |
for (def unit : units) { |
|
77 |
|
|
78 |
String forme = null; |
|
79 |
if (prop == null) { // word_property is the analec unit property to use |
|
80 |
forme = unit.getProp(word_property) |
Formats disponibles : Unified diff