Révision 3224
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 3224) | ||
---|---|---|
50 | 50 |
int maximum_schema_size |
51 | 51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
52 | 52 |
String unit_ursql |
53 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=false, def="CATEGORIE") |
|
54 |
String unit_property_display
|
|
53 |
@Field @Option(name="unit_property_display_list", usage="Unit property to count", widget="String", required=false, def="CATEGORIE")
|
|
54 |
def unit_property_display_list
|
|
55 | 55 |
@Field @Option(name="word_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="") |
56 | 56 |
String word_property_display |
57 | 57 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0") |
... | ... | |
74 | 74 |
output_lexicaltable |
75 | 75 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=false, def="OFF") |
76 | 76 |
debug |
77 |
|
|
77 | 78 |
if (!ParametersDialog.open(this)) return |
78 | 79 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
79 | 80 |
|
81 |
unit_property_display_list = unit_property_display_list.split(",") as List |
|
82 |
unit_property_display_list.removeAll("") |
|
80 | 83 |
|
81 |
if (word_property_display != null && word_property_display.length() > 0 && unit_property_display != null && unit_property_display.length() > 0) {
|
|
82 |
println "Warning: both unit_property_display=$unit_property_display and word_property_display=$word_property_display are set. The index will be computed with the $word_property_display CQP property."
|
|
84 |
if (word_property_display != null && word_property_display.length() > 0 && unit_property_display_list != null && unit_property_display_list.size() > 0) {
|
|
85 |
println "Warning: both unit_property_display_list=$unit_property_display_list and word_property_display=$word_property_display are set. The index will be computed with the $word_property_display CQP property."
|
|
83 | 86 |
} |
84 | 87 |
|
85 |
if (word_property_display.length() == 0 && unit_property_display.length() == 0) {
|
|
86 |
println "Error: no analysis property specified in unit_property_display or word_property_display. Aborting." |
|
88 |
if (word_property_display.length() == 0 && unit_property_display_list.size() == 0) {
|
|
89 |
println "Error: no analysis property specified in unit_property_display_list or word_property_display. Aborting."
|
|
87 | 90 |
return |
88 | 91 |
} |
89 | 92 |
|
... | ... | |
107 | 110 |
return; |
108 | 111 |
} |
109 | 112 |
|
110 |
if (unit_property_display.length() > 0) { |
|
113 |
for (def unit_property_display : unit_property_display_list) { |
|
114 |
println "testing '$unit_property_display'" |
|
111 | 115 |
errors = AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], unit_property_display).size() |
112 | 116 |
if (errors > 0) { |
113 | 117 |
println "Error: some Unit types don't contain the $unit_property_display property: $errors" |
... | ... | |
133 | 137 |
counts[s] = counts[s] + 1 |
134 | 138 |
} |
135 | 139 |
} else { // use Analec unit property |
136 |
counts = selectedUnits.countBy { if (it.getProp(unit_property_display) == null) "<null>" else it.getProp(unit_property_display) } |
|
140 |
counts = selectedUnits.countBy { |
|
141 |
def values = [] |
|
142 |
for (def unit_property_display : unit_property_display_list) { |
|
143 |
|
|
144 |
if (it.getProp(unit_property_display) == null) { |
|
145 |
values << "<null>" |
|
146 |
} else { |
|
147 |
values << it.getProp(unit_property_display) |
|
148 |
} |
|
149 |
} |
|
150 |
return values.join(" ") |
|
151 |
} |
|
137 | 152 |
} |
153 |
|
|
138 | 154 |
if (counts.containsKey("")) counts["<empty>"] = counts[""]; |
139 | 155 |
counts.remove("") |
156 |
|
|
140 | 157 |
if (debug) { |
141 | 158 |
println "\n*** Statistics: " |
142 | 159 |
for (def k : counts.keySet()) { |
... | ... | |
166 | 183 |
def title = "${corpus.getMainCorpus()}.${corpusViewSelection} ${unit_ursql}" |
167 | 184 |
title += "[${position_in_matches}]." |
168 | 185 |
if (word_property_display.length() > 0) title += "${word_property_display} frequencies" |
169 |
else if (unit_property_display.length() > 0) title += "${unit_property_display} frequencies"
|
|
186 |
else if (unit_property_display_list.size() > 0) title += "${unit_property_display_list.join(' ')} frequencies"
|
|
170 | 187 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) title += "\n(${cql_limit} limits)" |
171 | 188 |
|
172 | 189 |
if (word_property_display.length() > 0) println "Index de la propriété $word_property_display des mots des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}" |
173 |
else println "Index de la propriété $unit_property_display des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
174 |
println "$unit_property_display\t"+selection.join("\t")
|
|
190 |
else println "Index de la propriété ${unit_property_display_list.join(' ')} des unités $unit_ursql[$position_in_matches] de ${corpus.getMainCorpus()}.${corpusViewSelection}"
|
|
191 |
println "${unit_property_display_list.join(' ')}\t"+selection.join("\t")
|
|
175 | 192 |
|
176 | 193 |
keys.eachWithIndex { prop_val, i -> |
177 | 194 |
String line = "" |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 3224) | ||
---|---|---|
1 |
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté |
|
1 |
// Copyright © 2016, 2021 ENS de Lyon, CNRS, University of Franche-Comté
|
|
2 | 2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
3 | 3 |
// @author mdecorde |
4 | 4 |
// @author sheiden |
... | ... | |
62 | 62 |
output_mode |
63 | 63 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
64 | 64 |
debug |
65 |
if (!ParametersDialog.open(this)) return; |
|
65 |
|
|
66 |
@Field @Option(name="outputFile", usage="set outputFile to ' ' (space) to write in console", widget="CreateFile", required=false, def=" ") |
|
67 |
File outputFile |
|
68 |
|
|
69 |
if (!ParametersDialog.open(this)) return |
|
70 |
|
|
66 | 71 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
67 | 72 |
|
68 |
|
|
69 | 73 |
def CQI = CQPSearchEngine.getCqiClient() |
70 | 74 |
|
75 |
if (outputFile != null) { |
|
76 |
println "Outputing to $outputFile" |
|
77 |
outStream = new PrintStream(outputFile) |
|
78 |
} else { |
|
79 |
outStream = System.out |
|
80 |
} |
|
81 |
|
|
71 | 82 |
//corpus = corpusViewSelection |
72 | 83 |
def allResults = [:] |
73 | 84 |
for (def corpus : selection) { |
... | ... | |
96 | 107 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
97 | 108 |
|
98 | 109 |
def props = unit.getProps().sort() |
99 |
println sprintf("#%4d, %d-%d, \"%s\" %s", n, unit.getDeb(), unit.getFin(), form, props) |
|
110 |
outStream << sprintf("#%4d, %d-%d, \"%s\" %s", n, unit.getDeb(), unit.getFin(), form, props) |
|
111 |
outStream << "\n" |
|
100 | 112 |
n++ |
101 | 113 |
} |
102 | 114 |
} |
103 | 115 |
} else if (output_mode == "TABULATED") { |
116 |
|
|
104 | 117 |
for (def corpus : allResults.keySet()) { |
105 | 118 |
def word = corpus.getWordProperty() |
106 | 119 |
def selectedUnits = allResults[corpus] |
107 |
println "$corpus units: "+selectedUnits.size() |
|
108 |
def n = 1 |
|
109 |
for (def unit : selectedUnits) { |
|
110 |
int[] pos = null |
|
111 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
112 |
else pos = (unit.getDeb()..unit.getFin()) |
|
113 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
114 |
|
|
115 |
def props = unit.getProps().sort() |
|
116 |
println sprintf("#%4d\t%d\t%d\t%s\t%s", n, unit.getDeb(), unit.getFin(), form, props) |
|
117 |
n++ |
|
120 |
|
|
121 |
println ""+selectedUnits.size()+" units in $corpus.\n" |
|
122 |
|
|
123 |
if (selectedUnits.size() > 0) { |
|
124 |
def n = 1 |
|
125 |
|
|
126 |
outStream << "#\tstart\tend\tform" |
|
127 |
selectedUnits[0].getProps().keySet().sort().each { outStream << "\t"+it } |
|
128 |
outStream << "\n" |
|
129 |
|
|
130 |
for (def unit : selectedUnits) { |
|
131 |
int[] pos = null |
|
132 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
133 |
else pos = (unit.getDeb()..unit.getFin()) |
|
134 |
def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ") |
|
135 |
def props = unit.getProps() |
|
136 |
|
|
137 |
outStream << sprintf("%d\t%d\t%d\t%s", n, unit.getDeb(), unit.getFin(), form) |
|
138 |
props.sort()*.value.each { outStream << "\t"+it } |
|
139 |
outStream << "\n" |
|
140 |
|
|
141 |
n++ |
|
142 |
} |
|
118 | 143 |
} |
119 | 144 |
} |
120 | 145 |
} else if (output_mode == "COUNT") { |
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/AnalecUtils.groovy (revision 3224) | ||
---|---|---|
139 | 139 |
for (def k : groupedUnits.keySet()) { |
140 | 140 |
def selectedUnits = AnalecUtils.filterUniteByInclusion(debug, groupedUnits[k], matches, strict_inclusion, position_in_matches) |
141 | 141 |
|
142 |
if (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size ) {
|
|
142 |
if (schema_ursql == null || schema_ursql.length() == 0 || (minimum_schema_size <= selectedUnits.size() && selectedUnits.size() <= maximum_schema_size)) {
|
|
143 | 143 |
allUnits.addAll(selectedUnits) |
144 |
} else { |
|
145 |
|
|
146 | 144 |
} |
147 | 145 |
} |
148 | 146 |
if (debug >= 2) println "selectedUnits=${allUnits.size()}" |
... | ... | |
151 | 149 |
|
152 | 150 |
return allUnits |
153 | 151 |
} |
152 |
|
|
154 | 153 |
/** |
155 | 154 |
* filter groups elements with the elements positions |
156 | 155 |
* |
Formats disponibles : Unified diff