Révision 2105
| tmp/org.txm.backtomedia.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
#Fri Jul 06 10:25:04 CEST 2018 |
| 2 | 2 |
bin.includes=feature.xml |
| 3 |
qualifier=svn |
|
| tmp/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 2105) | ||
|---|---|---|
| 1243 | 1243 |
StatusLine.setMessage(ConcordanceUIMessages.startComputingConcordance); |
| 1244 | 1244 |
|
| 1245 | 1245 |
try {
|
| 1246 |
|
|
| 1247 | 1246 |
QueriesView.refresh(); |
| 1248 | 1247 |
|
| 1249 | 1248 |
this.fillDisplayArea(update); |
| 1250 | 1249 |
|
| 1250 |
boolean b = concordance.getQueryResultParameter() == null; |
|
| 1251 |
this.queryWidget.setEnabled(b); // disable the widget if a QueryResult was provided |
|
| 1252 |
|
|
| 1251 | 1253 |
this.queryWidget.memorize(); |
| 1252 | 1254 |
|
| 1253 | 1255 |
if (update) {
|
| tmp/org.txm.rcp/src/main/java/org/txm/rcp/swt/widget/AssistedChoiceQueryWidget.java (revision 2105) | ||
|---|---|---|
| 141 | 141 |
break; // set the first engine as default engine |
| 142 | 142 |
} |
| 143 | 143 |
} |
| 144 |
|
|
| 145 |
public void setEnabled(boolean b) {
|
|
| 146 |
super.setEnabled(b); |
|
| 147 |
querywidget.setEnabled(b); |
|
| 148 |
} |
|
| 144 | 149 |
|
| 145 | 150 |
public void setSearchEngine(SearchEngine engine) {
|
| 146 | 151 |
if (engineCombo != null) {
|
| tmp/org.txm.treetagger.binaries.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
bin.includes = feature.xml |
| 2 |
qualifier=svn |
|
| tmp/org.txm.treetagger.files.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
#Fri Jul 06 10:25:19 CEST 2018 |
| 2 | 2 |
bin.includes=feature.xml |
| 3 |
qualifier=svn |
|
| tmp/org.txm.concordance.core/src/org/txm/concordance/core/functions/Concordance.java (revision 2105) | ||
|---|---|---|
| 178 | 178 |
this.pQueryResult = pQueryResult; |
| 179 | 179 |
} |
| 180 | 180 |
|
| 181 |
public QueryResult getQueryResultParameter() {
|
|
| 182 |
return pQueryResult; |
|
| 183 |
} |
|
| 181 | 184 |
|
| 182 | 185 |
/** |
| 183 | 186 |
* |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy (revision 2105) | ||
|---|---|---|
| 12 | 12 |
import org.kohsuke.args4j.* |
| 13 | 13 |
import org.txm.Toolbox |
| 14 | 14 |
import org.txm.annotation.urs.* |
| 15 |
import org.txm.chartsengine.core.preferences.ChartsEnginePreferences |
|
| 15 | 16 |
import org.txm.chartsengine.r.core.RChartsEngine |
| 16 | 17 |
import org.txm.macro.urs.AnalecUtils |
| 17 | 18 |
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer |
| ... | ... | |
| 26 | 27 |
import visuAnalec.elements.* |
| 27 | 28 |
|
| 28 | 29 |
def scriptName = this.class.getSimpleName() |
| 29 |
def parent |
|
| 30 |
|
|
| 30 | 31 |
def selection = [] |
| 31 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 32 |
println "** $scriptName please select a Corpus to run the macro"
|
|
| 33 |
return;
|
|
| 32 |
for (def s : corpusViewSelections) {
|
|
| 33 |
if (s instanceof CQPCorpus) selection << s
|
|
| 34 |
else if (s instanceof Partition) selection.addAll(s.getParts())
|
|
| 34 | 35 |
} |
| 35 |
selection << corpusViewSelection // only one corpus for progression |
|
| 36 |
parent = corpusViewSelection |
|
| 37 | 36 |
|
| 37 |
if (selection.size() == 0) {
|
|
| 38 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 39 |
return false |
|
| 40 |
} else {
|
|
| 41 |
for (def c : selection) c.compute(false) |
|
| 42 |
} |
|
| 43 |
|
|
| 38 | 44 |
// BEGINNING OF PARAMETERS |
| 39 |
@Field @Option(name="sep", usage="Schemas and units selection part", widget="Separator", required=true, def="Selection")
|
|
| 45 |
@Field @Option(name="sep", usage="Schemas and units selection part", widget="Separator", required=false, def="Selection")
|
|
| 40 | 46 |
String sep |
| 41 | 47 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
| 42 | 48 |
String schema_ursql |
| ... | ... | |
| 56 | 62 |
boolean strict_inclusion |
| 57 | 63 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0") |
| 58 | 64 |
int position_in_matches |
| 59 |
@Field @Option(name="sep2", usage="Progression command parameters", widget="Separator", required=true, def="Progression")
|
|
| 65 |
@Field @Option(name="sep2", usage="Progression command parameters", widget="Separator", required=false, def="Progression")
|
|
| 60 | 66 |
String sep2 |
| 61 | 67 |
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div") |
| 62 | 68 |
String struct_name |
| ... | ... | |
| 81 | 87 |
|
| 82 | 88 |
def CQI = CQPSearchEngine.getCqiClient() |
| 83 | 89 |
|
| 84 |
def cql_limit_matches = null; |
|
| 85 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
|
|
| 86 |
def limitssubcorpus = parent.createSubcorpus(cql_limit, parent.getName().toUpperCase()) |
|
| 87 |
cql_limit_matches = limitssubcorpus.getMatches(); |
|
| 88 |
limitssubcorpus.delete(); |
|
| 89 |
} else {
|
|
| 90 |
cql_limit_matches = parent.getMatches() |
|
| 91 |
} |
|
| 92 | 90 |
|
| 91 |
|
|
| 93 | 92 |
def queries = [] |
| 94 | 93 |
def queryResults = [] |
| 95 | 94 |
def informations = [] |
| 96 | 95 |
def styles = [] |
| 97 | 96 |
for (def corpus : selection) {
|
| 98 | 97 |
|
| 98 |
def cql_limit_matches = null; |
|
| 99 |
if (cql_limit != null && !cql_limit.getQueryString().equals("\"\"")) {
|
|
| 100 |
def limitssubcorpus = corpus.createSubcorpus(cql_limit, corpus.getName().toUpperCase()) |
|
| 101 |
cql_limit_matches = limitssubcorpus.getMatches(); |
|
| 102 |
limitssubcorpus.delete(); |
|
| 103 |
} else {
|
|
| 104 |
cql_limit_matches = corpus.getMatches() |
|
| 105 |
} |
|
| 106 |
|
|
| 99 | 107 |
mainCorpus = corpus.getMainCorpus() |
| 100 | 108 |
|
| 101 | 109 |
def word = mainCorpus.getWordProperty() |
| ... | ... | |
| 155 | 163 |
if (allHighlightedUnits != null && allHighlightedUnits.containsKey(schema)) selectedAndHighlightedUnits.addAll(allHighlightedUnits[schema]) |
| 156 | 164 |
|
| 157 | 165 |
if (cql_limit_matches != null) {
|
| 158 |
if (debug) println "corpus matches: "+parent.getMatches()
|
|
| 166 |
if (debug) println "corpus matches: "+corpus.getMatches()
|
|
| 159 | 167 |
if (debug) println "filter cql_limit_matches=${cql_limit_matches} with "+selectedAndHighlightedUnits.size()+" units."
|
| 160 | 168 |
selectedAndHighlightedUnits = AnalecUtils.filterUniteByInclusion(debug, selectedAndHighlightedUnits, cql_limit_matches, strict_inclusion, position_in_matches) |
| 161 | 169 |
if (debug) println "selectedAndHighlightedUnits=${selectedAndHighlightedUnits.size()}"
|
| ... | ... | |
| 192 | 200 |
informations << unitsinformations |
| 193 | 201 |
styles << unitsstyles |
| 194 | 202 |
} |
| 203 |
|
|
| 204 |
try {
|
|
| 205 |
def struct = corpus.getStructuralUnit(struct_name); |
|
| 206 |
def struct_p = null; |
|
| 207 |
if (struct != null) {
|
|
| 208 |
struct_p = struct.getProperty(struct_prop) |
|
| 209 |
} |
|
| 210 |
|
|
| 211 |
Progression progression = new Progression(corpus); |
|
| 212 |
progression.setParameters(queries, |
|
| 213 |
struct, struct_p, ".*", |
|
| 214 |
cummulative, line_width, false, bande_width) |
|
| 215 |
|
|
| 216 |
progression.setQueryResults(queryResults); // new |
|
| 217 |
progression.setParameter(ChartsEnginePreferences.SHOW_LEGEND, show_legend); |
|
| 218 |
progression.setParameter(ChartsEnginePreferences.SHOW_TITLE, show_title); |
|
| 219 |
|
|
| 220 |
progression.compute() |
|
| 221 |
if (!progression.stepStructuralUnits() || monitor.isCanceled()) return |
|
| 222 |
monitor.worked(20) |
|
| 223 |
if (!progression.stepFinalize() || monitor.isCanceled()) return |
|
| 224 |
monitor.worked(20) |
|
| 225 |
|
|
| 226 |
monitor.syncExec(new Runnable() {
|
|
| 227 |
@Override |
|
| 228 |
public void run() {
|
|
| 229 |
try {
|
|
| 230 |
//def charteditorpart = org.txm.chartsengine.rcp.SWTChartsComponentsProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
|
| 231 |
def charteditorpart = org.txm.chartsengine.rcp.editors.ChartEditor.openEditor(progression); |
|
| 232 |
// JFreeChart chart = charteditorpart.getChart() |
|
| 233 |
// def plot = chart.getXYPlot() |
|
| 234 |
// |
|
| 235 |
// ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
| 236 |
// //renderer.setBaseItemLabelsVisible(true) |
|
| 237 |
// //renderer.setBaseLinesVisible(false) |
|
| 238 |
// //renderer.setBaseSeriesVisible(SchemasProgressionMacro.this.show_lines, true); |
|
| 239 |
// //renderer.setItemLabelsVisible(true) |
|
| 240 |
// //renderer.setLinesVisible(false) |
|
| 241 |
// renderer.setAdditionalLabelInformation(informations) |
|
| 242 |
// renderer.setAdditionalShapeScales(styles) |
|
| 243 |
// chart.getLegend().setVisible(SchemasProgressionMacro.this.show_legend) |
|
| 244 |
// chart.getTitle().setVisible(SchemasProgressionMacro.this.show_title) |
|
| 245 |
|
|
| 246 |
} catch(Exception e) {e.printStackTrace()}
|
|
| 247 |
} |
|
| 248 |
}) |
|
| 249 |
|
|
| 250 |
} catch(Exception e) {
|
|
| 251 |
e.printStackTrace() |
|
| 252 |
return false |
|
| 253 |
} |
|
| 195 | 254 |
} |
| 196 | 255 |
|
| 197 | 256 |
//println ""+queries.size()+" selected schemas: "+queries |
| 198 | 257 |
|
| 199 |
def corpus = parent |
|
| 200 |
try {
|
|
| 201 |
def struct = corpus.getStructuralUnit(struct_name); |
|
| 202 |
def struct_p = null; |
|
| 203 |
if (struct != null) {
|
|
| 204 |
struct_p = struct.getProperty(struct_prop) |
|
| 205 |
} |
|
| 206 |
|
|
| 207 |
Progression progression = new Progression(corpus); |
|
| 208 |
progression.setParameters(queries, |
|
| 209 |
struct, struct_p, ".*", |
|
| 210 |
cummulative, line_width, false, bande_width) |
|
| 211 |
|
|
| 212 |
progression.setQueryResults(queryResults); // new |
|
| 213 |
progression.compute() |
|
| 214 |
if (!progression.stepStructuralUnits() || monitor.isCanceled()) return |
|
| 215 |
monitor.worked(20) |
|
| 216 |
if (!progression.stepFinalize() || monitor.isCanceled()) return |
|
| 217 |
monitor.worked(20) |
|
| 218 |
|
|
| 219 |
monitor.syncExec(new Runnable() {
|
|
| 220 |
@Override |
|
| 221 |
public void run() {
|
|
| 222 |
try {
|
|
| 223 |
def charteditorpart = SWTChartsComponentProvider.openEditor(Application.swtComponentProvider.createProgressionChartEditorPart(IImageKeys.getImage(IImageKeys.ACTION_PROGRESSION), progression, progression.isMonochrome(), progression.isMonostyle(), progression.isDoCumulative())) |
|
| 224 |
JFreeChart chart = charteditorpart.getChart() |
|
| 225 |
def plot = chart.getXYPlot() |
|
| 226 |
|
|
| 227 |
ProgressionItemSelectionRenderer renderer = plot.getRenderer(); |
|
| 228 |
//renderer.setBaseItemLabelsVisible(true) |
|
| 229 |
//renderer.setBaseLinesVisible(false) |
|
| 230 |
//renderer.setBaseSeriesVisible(SchemasProgressionMacro.this.show_lines, true); |
|
| 231 |
//renderer.setItemLabelsVisible(true) |
|
| 232 |
//renderer.setLinesVisible(false) |
|
| 233 |
renderer.setAdditionalLabelInformation(informations) |
|
| 234 |
renderer.setAdditionalShapeScales(styles) |
|
| 235 |
chart.getLegend().setVisible(SchemasProgressionMacro.this.show_legend) |
|
| 236 |
chart.getTitle().setVisible(SchemasProgressionMacro.this.show_title) |
|
| 237 |
|
|
| 238 |
} catch(Exception e) {e.printStackTrace()}
|
|
| 239 |
} |
|
| 240 |
}) |
|
| 241 |
|
|
| 242 |
} catch(Exception e) {
|
|
| 243 |
e.printStackTrace() |
|
| 244 |
return false |
|
| 245 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsListMacro.groovy (revision 2105) | ||
|---|---|---|
| 37 | 37 |
if (selection.size() == 0) {
|
| 38 | 38 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 39 | 39 |
return false |
| 40 |
} else {
|
|
| 41 |
for (def c : selection) c.compute(false) |
|
| 40 | 42 |
} |
| 41 | 43 |
|
| 42 | 44 |
// BEGINNING OF PARAMETERS |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasListMacro.groovy (revision 2105) | ||
|---|---|---|
| 18 | 18 |
import org.apache.commons.lang.StringUtils |
| 19 | 19 |
import org.txm.searchengine.cqp.CQPSearchEngine |
| 20 | 20 |
|
| 21 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 22 |
println "Corpus view selection is not a Corpus" |
|
| 23 |
return; |
|
| 21 |
def selection = [] |
|
| 22 |
for (def s : corpusViewSelections) {
|
|
| 23 |
if (s instanceof CQPCorpus) selection << s |
|
| 24 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 24 | 25 |
} |
| 25 | 26 |
|
| 27 |
if (selection.size() == 0) {
|
|
| 28 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 29 |
return false |
|
| 30 |
} else {
|
|
| 31 |
for (def c : selection) c.compute(false) |
|
| 32 |
} |
|
| 33 |
|
|
| 26 | 34 |
// BEGINNING OF PARAMETERS |
| 27 | 35 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
| 28 | 36 |
String schema_ursql |
| ... | ... | |
| 50 | 58 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 51 | 59 |
|
| 52 | 60 |
|
| 53 |
CQPCorpus corpus = corpusViewSelection |
|
| 54 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 55 |
|
|
| 56 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 57 |
if (errors.size() > 0) {
|
|
| 58 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 59 |
return; |
|
| 60 |
} |
|
| 61 |
|
|
| 62 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 63 |
if (errors.size() > 0) {
|
|
| 64 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 65 |
return; |
|
| 66 |
} |
|
| 67 |
|
|
| 68 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 69 |
|
|
| 70 |
if (buildCQL) {
|
|
| 71 |
word_prop = corpus.getProperty("id")
|
|
| 72 |
} else {
|
|
| 73 |
word_prop = corpus.getProperty(word_property) |
|
| 74 |
} |
|
| 75 |
|
|
| 76 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 77 |
schemas.sort() {it.getProps()}
|
|
| 78 |
def nSchemas = 0 |
|
| 79 |
|
|
| 80 |
def lens = [:] |
|
| 81 |
for (def schema : schemas) {
|
|
| 82 |
|
|
| 83 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 84 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 61 |
for (def corpus : selection) {
|
|
| 62 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 85 | 63 |
|
| 86 |
print schema.getProps().toString()+ ": " |
|
| 87 |
def first = true |
|
| 88 |
for (def unit : units) {
|
|
| 89 |
|
|
| 90 |
String forme = null; |
|
| 91 |
|
|
| 92 |
if (buildCQL) {
|
|
| 93 |
int[] pos = null |
|
| 94 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 95 |
else pos = (unit.getDeb()..unit.getFin()) |
|
| 96 |
def first2= true |
|
| 97 |
q = "" |
|
| 98 |
pos.each {
|
|
| 99 |
if (first2) { first2 = false } else { q = q+" " }
|
|
| 100 |
int[] pos2 = [it] |
|
| 101 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
| 102 |
} |
|
| 103 |
if (first) { first = false } else { print "|" }
|
|
| 104 |
print "("+q+")"
|
|
| 105 |
} else {
|
|
| 106 |
if (word_prop == null) { // word_property is the analec unit property to use
|
|
| 107 |
forme = unit.getProp(word_property) |
|
| 108 |
} else {
|
|
| 64 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 65 |
if (errors.size() > 0) {
|
|
| 66 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 67 |
return; |
|
| 68 |
} |
|
| 69 |
|
|
| 70 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 71 |
if (errors.size() > 0) {
|
|
| 72 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 73 |
return; |
|
| 74 |
} |
|
| 75 |
|
|
| 76 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 77 |
|
|
| 78 |
if (buildCQL) {
|
|
| 79 |
word_prop = corpus.getProperty("id")
|
|
| 80 |
} else {
|
|
| 81 |
word_prop = corpus.getProperty(word_property) |
|
| 82 |
} |
|
| 83 |
|
|
| 84 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 85 |
schemas.sort() {it.getProps()}
|
|
| 86 |
def nSchemas = 0 |
|
| 87 |
|
|
| 88 |
def lens = [:] |
|
| 89 |
|
|
| 90 |
println "$corpus schemas (${schemas.size()}):"
|
|
| 91 |
for (def schema : schemas) {
|
|
| 92 |
|
|
| 93 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 94 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 95 |
|
|
| 96 |
print schema.getProps().toString()+ ": " |
|
| 97 |
def first = true |
|
| 98 |
for (def unit : units) {
|
|
| 99 |
|
|
| 100 |
String forme = null; |
|
| 101 |
|
|
| 102 |
if (buildCQL) {
|
|
| 109 | 103 |
int[] pos = null |
| 110 | 104 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
| 111 | 105 |
else pos = (unit.getDeb()..unit.getFin()) |
| 112 |
|
|
| 113 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 106 |
def first2= true |
|
| 107 |
q = "" |
|
| 108 |
pos.each {
|
|
| 109 |
if (first2) { first2 = false } else { q = q+" " }
|
|
| 110 |
int[] pos2 = [it] |
|
| 111 |
q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]" |
|
| 112 |
} |
|
| 113 |
if (first) { first = false } else { print "|" }
|
|
| 114 |
print "("+q+")"
|
|
| 115 |
} else {
|
|
| 116 |
if (word_prop == null) { // word_property is the analec unit property to use
|
|
| 117 |
forme = unit.getProp(word_property) |
|
| 118 |
} else {
|
|
| 119 |
int[] pos = null |
|
| 120 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 121 |
else pos = (unit.getDeb()..unit.getFin()) |
|
| 122 |
|
|
| 123 |
forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 124 |
} |
|
| 125 |
|
|
| 126 |
if (first) { first = false } else { print separator }
|
|
| 127 |
print forme |
|
| 114 | 128 |
} |
| 115 |
|
|
| 116 |
if (first) { first = false } else { print separator }
|
|
| 117 |
print forme |
|
| 118 | 129 |
} |
| 130 |
println "" |
|
| 131 |
|
|
| 132 |
nSchemas++ |
|
| 119 | 133 |
} |
| 120 |
println ""
|
|
| 134 |
}
|
|
| 121 | 135 |
|
| 122 |
nSchemas++ |
|
| 123 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterdistanceMacro.groovy (revision 2105) | ||
|---|---|---|
| 15 | 15 |
import org.txm.searchengine.cqp.corpus.* |
| 16 | 16 |
import org.apache.commons.lang.StringUtils; |
| 17 | 17 |
|
| 18 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 19 |
println "Corpora selection is not a Corpus" |
|
| 20 |
return; |
|
| 18 |
def selection = [] |
|
| 19 |
for (def s : corpusViewSelections) {
|
|
| 20 |
if (s instanceof CQPCorpus) selection << s |
|
| 21 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 21 | 22 |
} |
| 22 | 23 |
|
| 24 |
if (selection.size() == 0) {
|
|
| 25 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 26 |
return false |
|
| 27 |
} else {
|
|
| 28 |
for (def c : selection) c.compute(false) |
|
| 29 |
} |
|
| 30 |
|
|
| 23 | 31 |
// BEGINNING OF PARAMETERS |
| 24 | 32 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
| 25 | 33 |
String schema_ursql |
| ... | ... | |
| 40 | 48 |
if (!ParametersDialog.open(this)) return; |
| 41 | 49 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 42 | 50 |
|
| 43 |
CQPCorpus corpus = corpusViewSelection |
|
| 44 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 45 |
|
|
| 46 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 47 |
if (errors.size() > 0) {
|
|
| 48 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 49 |
return; |
|
| 50 |
} |
|
| 51 |
|
|
| 52 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 53 |
if (errors.size() > 0) {
|
|
| 54 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 55 |
return; |
|
| 56 |
} |
|
| 57 |
|
|
| 58 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 59 |
def distances = []; |
|
| 60 |
def nDistances = 0 |
|
| 61 |
def cadences = []; |
|
| 62 |
for (def schema : schemas) {
|
|
| 51 |
for (def corpus : selection) {
|
|
| 52 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 63 | 53 |
|
| 64 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 65 |
|
|
| 66 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 54 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 55 |
if (errors.size() > 0) {
|
|
| 56 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 57 |
return; |
|
| 58 |
} |
|
| 67 | 59 |
|
| 68 |
Collections.sort(units) |
|
| 60 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 61 |
if (errors.size() > 0) {
|
|
| 62 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 63 |
return; |
|
| 64 |
} |
|
| 65 |
|
|
| 66 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 67 |
def distances = []; |
|
| 68 |
def nDistances = 0 |
|
| 69 |
def cadences = []; |
|
| 70 |
for (def schema : schemas) {
|
|
| 69 | 71 |
|
| 70 |
for (int i = 0 ; i < units.size() ; i++) {
|
|
| 71 |
int d1 = 0; |
|
| 72 |
int d2 = 0; |
|
| 73 |
if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin(); |
|
| 74 |
if (d1 < 0) {
|
|
| 75 |
//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1 |
|
| 76 |
d1 = 0; // the first unit pass the next one ? |
|
| 72 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 73 |
|
|
| 74 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 75 |
|
|
| 76 |
Collections.sort(units) |
|
| 77 |
|
|
| 78 |
for (int i = 0 ; i < units.size() ; i++) {
|
|
| 79 |
int d1 = 0; |
|
| 80 |
int d2 = 0; |
|
| 81 |
if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin(); |
|
| 82 |
if (d1 < 0) {
|
|
| 83 |
//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1 |
|
| 84 |
d1 = 0; // the first unit pass the next one ? |
|
| 85 |
} |
|
| 86 |
if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin(); |
|
| 87 |
if (d2 < 0) {
|
|
| 88 |
//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2 |
|
| 89 |
d2 = 0; // the first unit pass the next one ? |
|
| 90 |
} |
|
| 91 |
distances << d1 |
|
| 92 |
|
|
| 93 |
if (d1 < d2) cadences << d1 else cadences << d2 |
|
| 94 |
|
|
| 95 |
nDistances++ |
|
| 77 | 96 |
} |
| 78 |
if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin(); |
|
| 79 |
if (d2 < 0) {
|
|
| 80 |
//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2 |
|
| 81 |
d2 = 0; // the first unit pass the next one ? |
|
| 82 |
} |
|
| 83 |
distances << d1 |
|
| 84 |
|
|
| 85 |
if (d1 < d2) cadences << d1 else cadences << d2 |
|
| 86 |
|
|
| 87 |
nDistances++ |
|
| 88 | 97 |
} |
| 98 |
distances = distances.sort() |
|
| 99 |
cadences = cadences.sort() |
|
| 100 |
|
|
| 101 |
int distances_total = distances.sum() |
|
| 102 |
int cadences_total = cadences.sum() |
|
| 103 |
coef = (distances_total / nDistances) |
|
| 104 |
cadence = (cadences_total / nDistances) |
|
| 105 |
|
|
| 106 |
println "$corpus distances:" |
|
| 107 |
//println "distances $distances" |
|
| 108 |
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef" |
|
| 109 |
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)] |
|
| 110 |
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)] |
|
| 111 |
//println "cadences $cadences" |
|
| 112 |
println "cadence moyenne : $cadences_total / $nDistances = $cadence" |
|
| 113 |
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)] |
|
| 114 |
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)] |
|
| 115 |
|
|
| 116 |
//return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]] |
|
| 89 | 117 |
} |
| 90 |
distances = distances.sort() |
|
| 91 |
cadences = cadences.sort() |
|
| 92 |
|
|
| 93 |
int distances_total = distances.sum() |
|
| 94 |
int cadences_total = cadences.sum() |
|
| 95 |
coef = (distances_total / nDistances) |
|
| 96 |
cadence = (cadences_total / nDistances) |
|
| 97 |
println "distances $distances" |
|
| 98 |
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef" |
|
| 99 |
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)] |
|
| 100 |
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)] |
|
| 101 |
println "cadences $cadences" |
|
| 102 |
println "cadence moyenne : $cadences_total / $nDistances = $cadence" |
|
| 103 |
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)] |
|
| 104 |
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)] |
|
| 105 |
|
|
| 106 |
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]] |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2105) | ||
|---|---|---|
| 32 | 32 |
if (selection.size() == 0) {
|
| 33 | 33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 34 | 34 |
return false |
| 35 |
} else {
|
|
| 36 |
for (def c : selection) c.compute(false) |
|
| 35 | 37 |
} |
| 36 | 38 |
|
| 37 | 39 |
// BEGINNING OF PARAMETERS |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy (revision 2105) | ||
|---|---|---|
| 32 | 32 |
if (selection.size() == 0) {
|
| 33 | 33 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 34 | 34 |
return false |
| 35 |
} else {
|
|
| 36 |
for (def c : selection) c.compute(false) |
|
| 35 | 37 |
} |
| 36 | 38 |
|
| 37 | 39 |
// BEGINNING OF PARAMETERS |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialDensityMacro.groovy (revision 2105) | ||
|---|---|---|
| 13 | 13 |
import org.txm.searchengine.cqp.corpus.* |
| 14 | 14 |
import org.txm.macro.urs.AnalecUtils |
| 15 | 15 |
|
| 16 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 17 |
println "Corpora selection is not a Corpus" |
|
| 18 |
return; |
|
| 16 |
def selection = [] |
|
| 17 |
for (def s : corpusViewSelections) {
|
|
| 18 |
if (s instanceof CQPCorpus) selection << s |
|
| 19 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 19 | 20 |
} |
| 20 | 21 |
|
| 21 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE") |
|
| 22 |
if (selection.size() == 0) {
|
|
| 23 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 24 |
return false |
|
| 25 |
} else {
|
|
| 26 |
for (def c : selection) c.compute(false) |
|
| 27 |
} |
|
| 28 |
|
|
| 29 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE") |
|
| 22 | 30 |
String schema_ursql |
| 23 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
|
|
| 31 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=false, def="3")
|
|
| 24 | 32 |
int minimum_schema_size |
| 25 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
|
|
| 33 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=false, def="9999999")
|
|
| 26 | 34 |
int maximum_schema_size |
| 27 | 35 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
| 28 | 36 |
String unit_ursql |
| 29 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
| 37 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
|
|
| 30 | 38 |
int position_in_schema |
| 31 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
| 39 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
| 32 | 40 |
cql_limit |
| 33 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
|
|
| 41 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
|
|
| 34 | 42 |
strict_inclusion |
| 35 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
| 43 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
|
|
| 36 | 44 |
position_in_matches |
| 37 | 45 |
|
| 38 | 46 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
| ... | ... | |
| 42 | 50 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 43 | 51 |
|
| 44 | 52 |
|
| 45 |
CQPCorpus corpus = corpusViewSelection
|
|
| 53 |
for (def corpus : selection) {
|
|
| 46 | 54 |
def analecCorpus = URSCorpora.getCorpus(corpus); |
| 47 | 55 |
|
| 48 | 56 |
int nMots = corpus.getSize(); |
| ... | ... | |
| 53 | 61 |
int nUnites = units.size(); |
| 54 | 62 |
|
| 55 | 63 |
coef = (nUnites /nMots) |
| 56 |
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
|
|
| 64 |
println "$corpus referential density: nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
|
|
| 57 | 65 |
if (nUnites >= nMots) {
|
| 58 |
println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)" |
|
| 66 |
println "WARNING: possible encoding error in $corpus. Number of units ($nUnites) is greater than number of words ($nMots)"
|
|
| 59 | 67 |
} |
| 60 |
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]] |
|
| 68 |
//return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]] |
|
| 69 |
} |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaLengthsMacro.groovy (revision 2105) | ||
|---|---|---|
| 17 | 17 |
import org.txm.rcp.commands.* |
| 18 | 18 |
import org.txm.statsengine.r.core.RWorkspace |
| 19 | 19 |
|
| 20 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 21 |
println "Corpora selection is not a Corpus" |
|
| 22 |
return; |
|
| 20 |
def selection = [] |
|
| 21 |
for (def s : corpusViewSelections) {
|
|
| 22 |
if (s instanceof CQPCorpus) selection << s |
|
| 23 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 23 | 24 |
} |
| 24 | 25 |
|
| 26 |
if (selection.size() == 0) {
|
|
| 27 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 28 |
return false |
|
| 29 |
} else {
|
|
| 30 |
for (def c : selection) c.compute(false) |
|
| 31 |
} |
|
| 32 |
|
|
| 25 | 33 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
| 26 |
String schema_ursql |
|
| 34 |
String schema_ursql
|
|
| 27 | 35 |
|
| 28 | 36 |
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3") |
| 29 |
int minimum_schema_size |
|
| 37 |
int minimum_schema_size
|
|
| 30 | 38 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999") |
| 31 |
int maximum_schema_size |
|
| 39 |
int maximum_schema_size
|
|
| 32 | 40 |
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF") |
| 33 |
String schema_property_display |
|
| 41 |
String schema_property_display
|
|
| 34 | 42 |
|
| 35 | 43 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION") |
| 36 |
String unit_ursql |
|
| 37 |
|
|
| 44 |
String unit_ursql |
|
| 45 |
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false") |
|
| 46 |
output_graph = false |
|
| 38 | 47 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
| 39 |
debug |
|
| 48 |
debug
|
|
| 40 | 49 |
|
| 41 | 50 |
if (!ParametersDialog.open(this)) return; |
| 42 | 51 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 43 | 52 |
|
| 44 |
def corpus = corpusViewSelection
|
|
| 45 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 53 |
for (def corpus : selection) {
|
|
| 54 |
def analecCorpus = URSCorpora.getCorpus(corpus)
|
|
| 46 | 55 |
|
| 47 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 48 |
if (errors.size() > 0) {
|
|
| 49 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 50 |
return; |
|
| 51 |
} |
|
| 56 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
|
|
| 57 |
if (errors.size() > 0) {
|
|
| 58 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
|
|
| 59 |
return;
|
|
| 60 |
}
|
|
| 52 | 61 |
|
| 53 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 54 |
if (errors.size() > 0) {
|
|
| 55 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 56 |
return; |
|
| 57 |
} |
|
| 62 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
|
|
| 63 |
if (errors.size() > 0) {
|
|
| 64 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
|
|
| 65 |
return;
|
|
| 66 |
}
|
|
| 58 | 67 |
|
| 59 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 68 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
|
|
| 60 | 69 |
|
| 61 |
int nSchemas = 0; |
|
| 70 |
int nSchemas = 0;
|
|
| 62 | 71 |
|
| 63 |
def lens = [:] |
|
| 64 |
def lensnames = [:] |
|
| 65 |
for (def schema : schemas) {
|
|
| 72 |
def lens = [:]
|
|
| 73 |
def lensnames = [:]
|
|
| 74 |
for (def schema : schemas) {
|
|
| 66 | 75 |
|
| 67 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 68 |
|
|
| 69 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 70 |
|
|
| 71 |
int nUnites = units.size(); |
|
| 72 |
|
|
| 73 |
if (!lens.containsKey(nUnites)) {
|
|
| 74 |
lens[nUnites] = 0; |
|
| 75 |
lensnames[nUnites] = []; |
|
| 76 |
} |
|
| 77 |
|
|
| 78 |
lens[nUnites] = lens[nUnites] + 1; |
|
| 79 |
lensnames[nUnites] << schema.getProp(schema_property_display) |
|
| 80 |
nSchemas++; |
|
| 81 |
} |
|
| 76 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 82 | 77 |
|
| 83 |
//println "nSchemas=$nSchemas" |
|
| 84 |
def freqs = lens.keySet(); |
|
| 85 |
freqs.sort(); |
|
| 86 |
int t = 0; |
|
| 87 |
int n = 0; |
|
| 88 |
//println "Fréquences ("+freqs.size()+")"
|
|
| 89 |
for (def f : freqs) {
|
|
| 90 |
t += f * lens[f] |
|
| 91 |
n += lens[f] |
|
| 92 |
} |
|
| 78 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 93 | 79 |
|
| 94 |
coef = (t/n) |
|
| 95 |
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
|
|
| 96 |
def flens = [] |
|
| 97 |
slens.each { key, value -> value.times { flens << key } }
|
|
| 98 |
def nbins = flens.size()*2 |
|
| 80 |
int nUnites = units.size(); |
|
| 99 | 81 |
|
| 100 |
def cfreq = 0
|
|
| 101 |
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
|
|
| 102 |
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
|
|
| 103 |
slens.each { println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key] }
|
|
| 82 |
if (!lens.containsKey(nUnites)) {
|
|
| 83 |
lens[nUnites] = 0;
|
|
| 84 |
lensnames[nUnites] = [];
|
|
| 85 |
}
|
|
| 104 | 86 |
|
| 105 |
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
|
|
| 87 |
lens[nUnites] = lens[nUnites] + 1; |
|
| 88 |
lensnames[nUnites] << schema.getProp(schema_property_display) |
|
| 89 |
nSchemas++; |
|
| 90 |
} |
|
| 106 | 91 |
|
| 107 |
def r = RWorkspace.getRWorkspaceInstance() |
|
| 92 |
//println "nSchemas=$nSchemas" |
|
| 93 |
def freqs = lens.keySet(); |
|
| 94 |
freqs.sort(); |
|
| 95 |
int t = 0; |
|
| 96 |
int n = 0; |
|
| 97 |
//println "Fréquences ("+freqs.size()+")"
|
|
| 98 |
for (def f : freqs) {
|
|
| 99 |
t += f * lens[f] |
|
| 100 |
n += lens[f] |
|
| 101 |
} |
|
| 108 | 102 |
|
| 109 |
r.addVectorToWorkspace("len", slens2.keySet() as int[])
|
|
| 110 |
r.addVectorToWorkspace("freq", slens2.values() as int[])
|
|
| 111 |
r.addVectorToWorkspace("flen", flens as int[])
|
|
| 103 |
coef = (t/n) |
|
| 104 |
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
|
|
| 105 |
def flens = [] |
|
| 106 |
slens.each { key, value -> value.times { flens << key } }
|
|
| 107 |
def nbins = flens.size()*2 |
|
| 112 | 108 |
|
| 113 |
def corpusName = corpus.getID() |
|
| 109 |
def cfreq = 0 |
|
| 110 |
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef |
|
| 111 |
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq" |
|
| 112 |
slens.each { println it.key+" "+it.value+" "+(cfreq+=it.value)+" "+lensnames[it.key] }
|
|
| 114 | 113 |
|
| 115 |
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
|
|
| 116 |
def PNGFilePath = PNGFile.getAbsolutePath()
|
|
| 117 |
println "PNG file: "+PNGFilePath
|
|
| 114 |
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
|
|
| 115 |
if (output_graph) {
|
|
| 116 |
def r = RWorkspace.getRWorkspaceInstance()
|
|
| 118 | 117 |
|
| 119 |
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
|
|
| 120 |
def SVGFilePath = SVGFile.getAbsolutePath()
|
|
| 121 |
println "SVG file: "+SVGFilePath
|
|
| 118 |
r.addVectorToWorkspace("len", slens2.keySet() as int[])
|
|
| 119 |
r.addVectorToWorkspace("freq", slens2.values() as int[])
|
|
| 120 |
r.addVectorToWorkspace("flen", flens as int[])
|
|
| 122 | 121 |
|
| 123 |
/// BEGINNING OF R SCRIPT |
|
| 124 |
def script =""" |
|
| 122 |
def corpusName = corpus.getName() |
|
| 123 |
println "corpusName=$corpusName" |
|
| 124 |
def PNGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".png", new File(Toolbox.getTxmHomePath(), "results"))
|
|
| 125 |
def PNGFilePath = PNGFile.getAbsolutePath() |
|
| 126 |
println "PNG file: "+PNGFilePath |
|
| 127 |
|
|
| 128 |
def SVGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
|
|
| 129 |
def SVGFilePath = SVGFile.getAbsolutePath() |
|
| 130 |
println "SVG file: "+SVGFilePath |
|
| 131 |
|
|
| 132 |
/// BEGINNING OF R SCRIPT |
|
| 133 |
def script =""" |
|
| 125 | 134 |
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)") |
| 126 | 135 |
axis(side=1, at=len) |
| 127 | 136 |
dev.off() |
| 128 | 137 |
""" |
| 129 |
/// END OF R SCRIPT |
|
| 138 |
/// END OF R SCRIPT
|
|
| 130 | 139 |
|
| 131 |
// execute R script |
|
| 132 |
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
|
|
| 133 |
r.plot(SVGFile, "svglite(file = \"${SVGFilePath}\"); "+script)
|
|
| 140 |
// execute R script
|
|
| 141 |
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
|
|
| 142 |
r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
|
|
| 134 | 143 |
|
| 135 |
//display the SVG results graphic |
|
| 136 |
monitor.syncExec(new Runnable() {
|
|
| 137 |
@Override |
|
| 138 |
public void run() { OpenBrowser.openfile(SVGFilePath, corpusName+" Longueur des chaînes") }
|
|
| 139 |
}) |
|
| 144 |
//display the SVG results graphic |
|
| 145 |
monitor.syncExec(new Runnable() {
|
|
| 146 |
@Override |
|
| 147 |
public void run() { OpenBrowser.openfile(SVGFile.getAbsolutePath(), corpusName+" Longueur des chaînes") }
|
|
| 148 |
}) |
|
| 149 |
} |
|
| 150 |
//return ["result":coef, "data":lens] |
|
| 151 |
} |
|
| 140 | 152 |
|
| 141 |
return ["result":coef, "data":lens] |
|
| 153 |
|
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialStabilityMacro.groovy (revision 2105) | ||
|---|---|---|
| 16 | 16 |
import visuAnalec.elements.* |
| 17 | 17 |
import org.txm.searchengine.cqp.CQPSearchEngine |
| 18 | 18 |
|
| 19 |
if (!(corpusViewSelection instanceof CQPCorpus)) {
|
|
| 20 |
println "Corpora selection is not a Corpus" |
|
| 21 |
return; |
|
| 19 |
def selection = [] |
|
| 20 |
for (def s : corpusViewSelections) {
|
|
| 21 |
if (s instanceof CQPCorpus) selection << s |
|
| 22 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
|
| 22 | 23 |
} |
| 23 | 24 |
|
| 25 |
if (selection.size() == 0) {
|
|
| 26 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
|
| 27 |
return false |
|
| 28 |
} else {
|
|
| 29 |
for (def c : selection) c.compute(false) |
|
| 30 |
} |
|
| 31 |
|
|
| 24 | 32 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE") |
| 25 | 33 |
String schema_ursql |
| 26 | 34 |
|
| ... | ... | |
| 47 | 55 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| 48 | 56 |
|
| 49 | 57 |
|
| 50 |
def corpus = corpusViewSelection |
|
| 51 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 52 |
|
|
| 53 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 54 |
if (errors.size() > 0) {
|
|
| 55 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors." |
|
| 56 |
return; |
|
| 57 |
} |
|
| 58 |
|
|
| 59 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql) |
|
| 60 |
if (errors.size() > 0) {
|
|
| 61 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors." |
|
| 62 |
return; |
|
| 63 |
} |
|
| 64 |
def CQI = CQPSearchEngine.getCqiClient() |
|
| 65 |
|
|
| 66 |
def prop = corpus.getProperty(word_property) |
|
| 67 |
if (prop == null) { // no CQP property called $word_property
|
|
| 68 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property) |
|
| 58 |
for (def corpus : selection) {
|
|
| 59 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
| 60 |
|
|
| 61 |
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql) |
|
| 69 | 62 |
if (errors.size() > 0) {
|
| 70 |
println "** $word_property unit property cannot be computed in the corpus with types: $errors."
|
|
| 63 |
println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
|
|
| 71 | 64 |
return; |
| 72 | 65 |
} |
| 73 |
} |
|
| 74 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 75 |
allFormesSet = new HashSet(); |
|
| 76 |
def coefs = [] |
|
| 77 |
int n = 1 |
|
| 78 |
|
|
| 79 |
int nUnitesAllSchemas = 0 |
|
| 80 |
int nUnitesTotalSchemas = 0 |
|
| 81 |
|
|
| 82 |
for (def schema : schemas) {
|
|
| 83 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
| 84 |
nUnitesTotal = 0; |
|
| 85 | 66 |
|
| 86 |
def allUnites = schema.getUnitesSousjacentesNonTriees()
|
|
| 87 |
|
|
| 88 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
|
|
| 89 |
def nUnites = schema.getUnitesSousjacentes().size()
|
|
| 90 |
def nUnitesTotal = units.size()
|
|
| 91 |
for (def unit : units) {
|
|
| 67 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
|
|
| 68 |
if (errors.size() > 0) {
|
|
| 69 |
println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
|
|
| 70 |
return;
|
|
| 71 |
}
|
|
| 72 |
def CQI = CQPSearchEngine.getCqiClient()
|
|
| 92 | 73 |
|
| 93 |
String forme = null; |
|
| 94 |
if (prop == null) { // word_property is the analec unit property to use
|
|
| 95 |
forme = unit.getProp(word_property) |
|
| 96 |
} else {
|
|
| 97 |
int[] pos = null; |
|
| 98 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 99 |
else pos = unit.getDeb()..unit.getFin() |
|
| 100 |
|
|
| 101 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 74 |
def prop = corpus.getProperty(word_property) |
|
| 75 |
if (prop == null) { // no CQP property called $word_property
|
|
| 76 |
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property) |
|
| 77 |
if (errors.size() > 0) {
|
|
| 78 |
println "** $word_property unit property cannot be computed in the corpus with types: $errors." |
|
| 79 |
return; |
|
| 102 | 80 |
} |
| 103 |
|
|
| 104 |
formesSet.add(forme) |
|
| 105 | 81 |
} |
| 82 |
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size); |
|
| 83 |
allFormesSet = new HashSet(); |
|
| 84 |
def coefs = [] |
|
| 85 |
int n = 1 |
|
| 106 | 86 |
|
| 107 |
if (formesSet.size() == 0 || nUnitesTotal == 0) {
|
|
| 108 |
coef = "NA" |
|
| 109 |
} else {
|
|
| 110 |
coef = (nUnitesTotal/formesSet.size()) |
|
| 111 |
} |
|
| 112 |
coefs << coef |
|
| 113 |
allFormesSet.addAll(formesSet) |
|
| 87 |
int nUnitesAllSchemas = 0 |
|
| 88 |
int nUnitesTotalSchemas = 0 |
|
| 114 | 89 |
|
| 115 |
if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
|
|
| 116 |
print schema.getProp(schema_display_property_name) |
|
| 117 |
} else {
|
|
| 118 |
print schema_ursql+"-"+n+" : " |
|
| 119 |
} |
|
| 90 |
println "** $corpus schemas: " |
|
| 91 |
for (def schema : schemas) {
|
|
| 92 |
def formesSet = new HashSet(); // contient toutes les formes du CR courant |
|
| 93 |
nUnitesTotal = 0; |
|
| 94 |
|
|
| 95 |
def allUnites = schema.getUnitesSousjacentesNonTriees() |
|
| 120 | 96 |
|
| 121 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
|
|
| 122 |
if (show_values) {
|
|
| 123 |
println "\t${word_property}s="+formesSet
|
|
| 97 |
def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql) |
|
| 98 |
def nUnites = schema.getUnitesSousjacentes().size() |
|
| 99 |
def nUnitesTotal = units.size() |
|
| 100 |
for (def unit : units) {
|
|
| 101 |
|
|
| 102 |
String forme = null; |
|
| 103 |
if (prop == null) { // word_property is the analec unit property to use
|
|
| 104 |
forme = unit.getProp(word_property) |
|
| 105 |
} else {
|
|
| 106 |
int[] pos = null; |
|
| 107 |
if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()] |
|
| 108 |
else pos = unit.getDeb()..unit.getFin() |
|
| 109 |
|
|
| 110 |
forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough |
|
| 111 |
} |
|
| 112 |
|
|
| 113 |
formesSet.add(forme) |
|
| 114 |
} |
|
| 115 |
|
|
| 116 |
if (formesSet.size() == 0 || nUnitesTotal == 0) {
|
|
| 117 |
coef = "NA" |
|
| 118 |
} else {
|
|
| 119 |
coef = (nUnitesTotal/formesSet.size()) |
|
| 120 |
} |
|
| 121 |
coefs << coef |
|
| 122 |
allFormesSet.addAll(formesSet) |
|
| 123 |
|
|
| 124 |
if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
|
|
| 125 |
print schema.getProp(schema_display_property_name) |
|
| 126 |
} else {
|
|
| 127 |
print schema_ursql+"-"+n+" : " |
|
| 128 |
} |
|
| 129 |
|
|
| 130 |
println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
|
|
| 131 |
if (show_values) {
|
|
| 132 |
println "\t${word_property}s="+formesSet
|
|
| 133 |
} |
|
| 134 |
n++ |
|
| 135 |
|
|
| 136 |
nUnitesAllSchemas += nUnites |
|
| 137 |
nUnitesTotalSchemas += nUnitesTotal |
|
| 124 | 138 |
} |
| 125 |
n++ |
|
| 126 | 139 |
|
| 127 |
nUnitesAllSchemas += nUnites |
|
| 128 |
nUnitesTotalSchemas += nUnitesTotal |
|
| 140 |
coef = nUnitesTotalSchemas/allFormesSet.size() |
|
| 141 |
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
|
|
| 142 |
|
|
| 143 |
// return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)] |
|
| 129 | 144 |
} |
| 130 |
|
|
| 131 |
coef = nUnitesTotalSchemas/allFormesSet.size() |
|
| 132 |
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
|
|
| 133 |
|
|
| 134 |
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)] |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsIndexMacro.groovy (revision 2105) | ||
|---|---|---|
| 11 | 11 |
import org.kohsuke.args4j.* |
| 12 | 12 |
import org.txm.Toolbox |
| 13 | 13 |
import org.txm.annotation.urs.* |
| 14 |
import org.txm.lexicaltable.core.functions.LexicalTable |
|
| 14 | 15 |
import org.txm.lexicaltable.core.statsengine.r.data.LexicalTableImpl |
| 15 | 16 |
import org.txm.macro.urs.* |
| 16 | 17 |
import org.txm.rcp.commands.* |
| ... | ... | |
| 36 | 37 |
if (selection.size() == 0) {
|
| 37 | 38 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 38 | 39 |
return false |
| 40 |
} else {
|
|
| 41 |
for (def c : selection) c.compute(false) |
|
| 39 | 42 |
} |
| 40 | 43 |
|
| 41 | 44 |
// BEGINNING OF PARAMETERS |
| 42 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
|
|
| 45 |
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
|
|
| 43 | 46 |
String schema_ursql |
| 44 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
|
|
| 47 |
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=false, def="3")
|
|
| 45 | 48 |
int minimum_schema_size |
| 46 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
|
|
| 49 |
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=false, def="9999999")
|
|
| 47 | 50 |
int maximum_schema_size |
| 48 | 51 |
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION") |
| 49 | 52 |
String unit_ursql |
| 50 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE")
|
|
| 53 |
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=false, def="CATEGORIE")
|
|
| 51 | 54 |
String unit_property_display |
| 52 | 55 |
@Field @Option(name="word_property_display", usage="Word property to display instead of the unit property", widget="String", required=false, def="") |
| 53 | 56 |
String word_property_display |
| 54 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
| 57 |
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
|
|
| 55 | 58 |
int position_in_schema |
| 56 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
|
|
| 59 |
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
|
|
| 57 | 60 |
cql_limit |
| 58 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
|
|
| 61 |
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
|
|
| 59 | 62 |
strict_inclusion |
| 60 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
|
|
| 63 |
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
|
|
| 61 | 64 |
position_in_matches |
| 62 | 65 |
//@Field @Option(name="output_2D", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true") |
| 63 | 66 |
output_2D = true |
| 64 |
@Field @Option(name="output_showlegend", usage="output barplot or 3D plot", widget="Boolean", required=true, def="true")
|
|
| 67 |
@Field @Option(name="output_showlegend", usage="output barplot or 3D plot", widget="Boolean", required=false, def="true")
|
|
| 65 | 68 |
output_showlegend |
| 66 |
@Field @Option(name="output_fmin", usage="minimal frequency displayed", widget="Integer", required=true, def="0")
|
|
| 69 |
@Field @Option(name="output_fmin", usage="minimal frequency displayed", widget="Integer", required=false, def="0")
|
|
| 67 | 70 |
output_fmin |
| 68 |
@Field @Option(name="output_histogram", usage="show or not a histogram of the result", widget="Boolean", required=true, def="true")
|
|
| 71 |
@Field @Option(name="output_histogram", usage="show or not a histogram of the result", widget="Boolean", required=false, def="true")
|
|
| 69 | 72 |
output_histogram |
| 70 |
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=true, def="false")
|
|
| 73 |
@Field @Option(name="output_lexicaltable", usage="create or not a lexical table with the result", widget="Boolean", required=false, def="false")
|
|
| 71 | 74 |
output_lexicaltable |
| 72 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF")
|
|
| 75 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=false, def="OFF")
|
|
| 73 | 76 |
debug |
| 74 | 77 |
if (!ParametersDialog.open(this)) return |
| 75 | 78 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
| ... | ... | |
| 238 | 241 |
|
| 239 | 242 |
def lt = null; |
| 240 | 243 |
if (output_lexicaltable) {
|
| 241 |
|
|
| 242 |
if (corpusViewSelection instanceof Partition) {
|
|
| 243 |
lt = new LexicalTableImpl(matrix as DoubleMatrix2D, corpusViewSelection, corpusViewSelection.getCorpus().getProperty("word"),
|
|
| 244 |
rownames, colnames) |
|
| 245 |
lt.setCorpus(corpusViewSelection.getCorpus()); |
|
| 246 |
corpusViewSelection.storeResult(lt) |
|
| 244 |
def parent = selection[0] |
|
| 245 |
println "parent="+parent.getClass() |
|
| 246 |
if (parent instanceof Part) {
|
|
| 247 |
println "part="+parent |
|
| 248 |
parent = parent.getParent() |
|
| 249 |
println "partition="+parent |
|
| 250 |
lt = new LexicalTable(parent); |
|
| 251 |
lt.setData(new LexicalTableImpl(matrix as DoubleMatrix2D, rownames, colnames)) |
|
| 252 |
|
|
| 247 | 253 |
} else {
|
| 248 |
lt = new LexicalTableImpl(matrix as DoubleMatrix2D, corpus.getProperty("word"),
|
|
| 249 |
rownames, colnames) |
|
| 250 |
lt.setCorpus(corpus); |
|
| 251 |
corpus.storeResult(lt) |
|
| 254 |
lt = new LexicalTable(parent); |
|
| 255 |
lt.setData(new LexicalTableImpl(matrix as DoubleMatrix2D, rownames, colnames)) |
|
| 252 | 256 |
} |
| 257 |
lt.setUserName(schema_ursql+">"+unit_ursql) |
|
| 253 | 258 |
} |
| 254 | 259 |
|
| 255 |
|
|
| 256 | 260 |
if (file.exists() && file.getTotalSpace() > 0) {
|
| 257 | 261 |
println "SVG file: "+file.getAbsolutePath() |
| 258 | 262 |
|
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsDeleteMacro.groovy (revision 2105) | ||
|---|---|---|
| 34 | 34 |
if (selection.size() == 0) {
|
| 35 | 35 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 36 | 36 |
return false |
| 37 |
} else {
|
|
| 38 |
for (def c : selection) c.compute(false) |
|
| 37 | 39 |
} |
| 38 | 40 |
|
| 39 | 41 |
// BEGINNING OF PARAMETERS |
| tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy (revision 2105) | ||
|---|---|---|
| 34 | 34 |
if (selection.size() == 0) {
|
| 35 | 35 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections |
| 36 | 36 |
return false |
| 37 |
} else {
|
|
| 38 |
for (def c : selection) c.compute(false) |
|
| 37 | 39 |
} |
| 38 | 40 |
|
| 39 | 41 |
// BEGINNING OF PARAMETERS |
| ... | ... | |
| 70 | 72 |
def allResults = [:] |
| 71 | 73 |
def errors = new HashSet() |
| 72 | 74 |
for (def corpus : selection) {
|
| 73 |
println "Annotate $corpus..."
|
|
| 75 |
println "Annotating $corpus..."
|
|
| 74 | 76 |
|
| 75 | 77 |
def word = corpus.getWordProperty() |
| 76 | 78 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
| tmp/org.txm.wordcloud.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
#Fri Jul 06 10:25:20 CEST 2018 |
| 2 | 2 |
bin.includes=feature.xml |
| 3 |
qualifier=svn |
|
| tmp/org.txm.tigersearch.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
#Fri Jul 06 10:25:18 CEST 2018 |
| 2 | 2 |
bin.includes=feature.xml |
| 3 |
qualifier=svn |
|
| tmp/TXMReleasePlugins.site/site.xml (revision 2105) | ||
|---|---|---|
| 3 | 3 |
<description name="TXM Extensions - RELEASE" url="http://txm.textometrie.org/updates/plugins"> |
| 4 | 4 |
This update site contains the Release plugins for TXM |
| 5 | 5 |
</description> |
| 6 |
<feature url="features/org.txm.backtomedia.feature_1.0.0.2079.jar" id="org.txm.backtomedia.feature" version="1.0.0.2079">
|
|
| 6 |
<feature url="features/org.txm.backtomedia.feature_1.0.0.201905221522.jar" id="org.txm.backtomedia.feature" version="1.0.0.201905221522">
|
|
| 7 | 7 |
<category name="Commands"/> |
| 8 | 8 |
</feature> |
| 9 |
<feature url="features/org.txm.wordcloud.feature_1.0.0.2079.jar" id="org.txm.wordcloud.feature" version="1.0.0.2079">
|
|
| 9 |
<feature url="features/org.txm.wordcloud.feature_1.0.0.201905221522.jar" id="org.txm.wordcloud.feature" version="1.0.0.201905221522">
|
|
| 10 | 10 |
<category name="Commands"/> |
| 11 | 11 |
</feature> |
| 12 |
<feature url="features/org.txm.treetagger.binaries.feature_1.0.0.1978.jar" id="org.txm.treetagger.binaries.feature" version="1.0.0.1978" os="" ws="">
|
|
| 12 |
<feature url="features/org.txm.treetagger.binaries.feature_1.0.0.201905221522.jar" id="org.txm.treetagger.binaries.feature" version="1.0.0.201905221522" os="" ws="">
|
|
| 13 | 13 |
<category name="Annotation"/> |
| 14 | 14 |
</feature> |
| 15 |
<feature url="features/org.txm.treetagger.models.feature_1.0.0.1978.jar" id="org.txm.treetagger.models.feature" version="1.0.0.1978">
|
|
| 15 |
<feature url="features/org.txm.treetagger.models.feature_1.0.0.201905221522.jar" id="org.txm.treetagger.models.feature" version="1.0.0.201905221522">
|
|
| 16 | 16 |
<category name="Annotation"/> |
| 17 | 17 |
</feature> |
| 18 |
<feature url="features/org.txm.annotation.urs.feature_1.0.0.2079.jar" id="org.txm.annotation.urs.feature" version="1.0.0.2079">
|
|
| 18 |
<feature url="features/org.txm.annotation.urs.feature_1.0.0.201905221522.jar" id="org.txm.annotation.urs.feature" version="1.0.0.201905221522">
|
|
| 19 | 19 |
<category name="Annotation"/> |
| 20 | 20 |
</feature> |
| 21 |
<feature url="features/org.txm.tigersearch.feature_1.0.0.2079.jar" id="org.txm.tigersearch.feature" version="1.0.0.2079">
|
|
| 21 |
<feature url="features/org.txm.tigersearch.feature_1.0.0.201905221522.jar" id="org.txm.tigersearch.feature" version="1.0.0.201905221522">
|
|
| 22 | 22 |
<category name="Annotation"/> |
| 23 | 23 |
</feature> |
| 24 | 24 |
<category-def name="Commands" label="Commands"> |
| tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 2105) | ||
|---|---|---|
| 161 | 161 |
//System.out.println("GSERunner.createGSERunner(): NO wiring: " + b.getSymbolicName());
|
| 162 | 162 |
} else if (bundleWiring.getClassLoader() != null) {
|
| 163 | 163 |
loaders.add(bundleWiring.getClassLoader()); |
| 164 |
|
|
| 165 | 164 |
//FIXME: debug |
| 166 | 165 |
//System.out.println("GSERunner.createGSERunner(): ADD class loader: " + b.getSymbolicName());
|
| 167 | 166 |
} else {
|
| tmp/org.txm.analec.feature/build.properties (revision 2105) | ||
|---|---|---|
| 1 | 1 |
#Fri Jul 06 10:25:02 CEST 2018 |
| 2 | 2 |
bin.includes=feature.xml |
| 3 |
qualifier=svn |
|
| tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/functions/LexicalTable.java (revision 2105) | ||
|---|---|---|
| 45 | 45 |
public static Integer LEXICALTABLE_COUNTER = new Integer(1); |
| 46 | 46 |
public static String SYMBOL_BASE = "LexicalTable_"; //$NON-NLS-1$ |
| 47 | 47 |
|
| 48 |
/** stores the R table computed */
|
|
| 48 |
/** stores the data computed */
|
|
| 49 | 49 |
protected ILexicalTable statsData; |
| 50 | 50 |
|
| 51 | 51 |
/** |
| ... | ... | |
| 73 | 73 |
protected boolean useAllOccurrences; |
| 74 | 74 |
|
| 75 | 75 |
/** |
| 76 |
* true if the data has been set with lt.setData(...) |
|
| 77 |
*/ |
|
| 78 |
@Parameter(key=LexicalTablePreferences.EXTERN_DATA) |
|
| 79 |
protected boolean pExternData; |
|
| 80 |
|
|
| 81 |
/** |
|
| 76 | 82 |
* Build a LexicalTable with the Subcorpus words and Subcorpus parent remaining words. |
| 77 | 83 |
* |
| 78 | 84 |
* @param corpus |
| ... | ... | |
| 165 | 171 |
return true; |
| 166 | 172 |
} |
| 167 | 173 |
|
| 168 |
|
|
| 169 | 174 |
/** |
| 170 | 175 |
* |
| 171 | 176 |
* @param partition |
| 172 | 177 |
* @throws Exception |
| 173 | 178 |
*/ |
| 174 |
protected void _computeFromPartition(Partition partition) throws Exception {
|
|
| 179 |
protected LexicalTableImpl _computeFromPartition(Partition partition) throws Exception {
|
|
| 175 | 180 |
|
| 176 | 181 |
// parts lexicons |
| 177 | 182 |
List<CQPLexicon> partsLexicons = new ArrayList<CQPLexicon>(); |
| ... | ... | |
| 215 | 220 |
} |
| 216 | 221 |
} catch (RWorkspaceException e) {
|
| 217 | 222 |
e.printStackTrace(); |
| 223 |
return null; |
|
| 218 | 224 |
} |
| 219 |
this.statsData = new LexicalTableImpl(mat, filteredForms.toArray(new String[]{}), partition.getPartNames().toArray(new String[] {}));
|
|
| 220 |
|
|
| 225 |
|
|
| 226 |
return new LexicalTableImpl(mat, filteredForms.toArray(new String[]{}), partition.getPartNames().toArray(new String[] {}));
|
|
| 221 | 227 |
} |
| 222 | 228 |
|
| 223 | 229 |
/** |
| ... | ... | |
| 229 | 235 |
* @return the lexical table |
| 230 | 236 |
* @throws RWorkspaceException the r workspace exception |
| 231 | 237 |
*/ |
| 232 |
protected void _computeFromIndexes(List<PartitionIndex> indexes) throws RWorkspaceException {
|
|
| 238 |
protected LexicalTableImpl _computeFromIndexes(List<PartitionIndex> indexes) throws RWorkspaceException {
|
|
| 233 | 239 |
|
| 234 | 240 |
Log.fine(LexicalTableCoreMessages.bind(LexicalTableCoreMessages.info_buildingLexicalTableWith, indexes)); |
| 235 | 241 |
|
| ... | ... | |
| 237 | 243 |
Partition partition = partIndex.getPartition(); // the Index is computed on a partition |
| 238 | 244 |
|
| 239 | 245 |
if (!partIndex.isComputedWithPartition()) {
|
| 240 |
throw new IllegalArgumentException("Indexes are not computed with a partition. Aborting.");
|
|
| 246 |
throw new IllegalArgumentException("Index is not computed with a partition. Aborting.");
|
|
| 241 | 247 |
} |
| 242 | 248 |
|
| 243 | 249 |
this.property = partIndex.getProperties().get(0); |
| ... | ... | |
| 310 | 316 |
} catch (CqiClientException e) {
|
| 311 | 317 |
// TODO Auto-generated catch block |
| 312 | 318 |
org.txm.utils.logger.Log.printStackTrace(e); |
| 319 |
return null; |
|
| 313 | 320 |
} |
| 314 | 321 |
} |
| 315 | 322 |
//System.out.println("mat size : ["+(rownames.size() + extra)+"]["+colnames.size()+"]");
|
| 316 | 323 |
//System.out.println("rownames size : "+rownames.size());
|
| 317 | 324 |
//System.out.println("colnames size : "+colnames.size());
|
| 318 |
this.statsData = new LexicalTableImpl(mat, rownames.toArray(new String[] {}), colnames.toArray(new String[] {}));
|
|
| 325 |
return new LexicalTableImpl(mat, rownames.toArray(new String[] {}), colnames.toArray(new String[] {}));
|
|
| 319 | 326 |
} |
| 320 | 327 |
|
| 321 | 328 |
|
| 322 | 329 |
@Override |
| 323 | 330 |
protected boolean _compute() throws Exception {
|
| 324 | 331 |
|
| 332 |
if (pExternData) {
|
|
| 333 |
return statsData != null; |
|
| 334 |
} |
|
| 325 | 335 |
// FIXME: SJ: became useless? |
| 326 | 336 |
//this.statsData = null; // reset frequencies |
| 327 | 337 |
|
| ... | ... | |
| 343 | 353 |
|
| 344 | 354 |
this.subTask("Computing from Partition: " + this.parent);
|
| 345 | 355 |
|
| 346 |
this._computeFromPartition((Partition)this.parent); |
|
| 356 |
this.statsData = this._computeFromPartition((Partition)this.parent);
|
|
| 347 | 357 |
} |
| 348 | 358 |
else if (parent instanceof PartitionIndex) {
|
| 349 | 359 |
|
| ... | ... | |
| 355 | 365 |
|
| 356 | 366 |
this.subTask("Computing from Partition Index: " + this.parent);
|
| 357 | 367 |
|
| 358 |
this._computeFromIndexes(Arrays.asList((PartitionIndex)this.parent)); |
|
| 368 |
this.statsData = this._computeFromIndexes(Arrays.asList((PartitionIndex)this.parent));
|
|
| 359 | 369 |
} |
| 360 | 370 |
|
| 361 | 371 |
if (this.statsData == null) { // no statsData computed or given
|
| ... | ... | |
| 686 | 696 |
return LexicalTableCoreMessages.RESULT_TYPE; |
| 687 | 697 |
} |
| 688 | 698 |
|
| 699 |
/** |
|
| 700 |
* Use this method to set manually data to the lexical table |
|
| 701 |
* @param lexicalTableImpl external data to use. If null the table will be computed using its parent |
|
| 702 |
*/ |
|
| 689 | 703 |
public void setData(LexicalTableImpl lexicalTableImpl) {
|
| 690 | 704 |
this.statsData = lexicalTableImpl; |
| 705 |
|
|
| 706 |
this.pExternData = statsData != null; |
|
| 707 |
setAltered(); |
|
| 691 | 708 |
} |
| 692 | 709 |
} |
| tmp/org.txm.lexicaltable.core/src/org/txm/lexicaltable/core/preferences/LexicalTablePreferences.java (revision 2105) | ||
|---|---|---|
| 19 | 19 |
*/ |
| 20 | 20 |
public static final String USE_ALL_OCCURRENCES = "use_all_occurrences"; //$NON-NLS-1$ |
| 21 | 21 |
|
| 22 |
/** |
|
| 23 |
* If true, the table data has been manually set with lt.setData(lexicalTableImpl) |
|
| 24 |
*/ |
|
| 25 |
public static final String EXTERN_DATA = "extern_data"; //$NON-NLS-1$ |
|
| 22 | 26 |
|
| 23 | 27 |
/** |
| 24 | 28 |
* Gets the instance. |
| ... | ... | |
| 39 | 43 |
preferences.putInt(F_MIN, 2); |
| 40 | 44 |
preferences.putInt(V_MAX, 200); |
| 41 | 45 |
preferences.putBoolean(USE_ALL_OCCURRENCES, true); |
| 46 |
preferences.putBoolean(EXTERN_DATA, false); |
|
| 42 | 47 |
preferences.put(UNIT_PROPERTY, DEFAULT_UNIT_PROPERTY); |
| 43 | 48 |
} |
| 44 | 49 |
|
| tmp/org.txm.core/src/java/org/txm/utils/BundleUtils.java (revision 2105) | ||
|---|---|---|
| 223 | 223 |
path = Toolbox.getPreference(TBXPreferences.INSTALL_DIR); |
| 224 | 224 |
} |
| 225 | 225 |
if (path.startsWith("file:")) {
|
| 226 |
path = path.substring(6);
|
|
| 226 |
path = path.substring(5);
|
|
| 227 | 227 |
} |
| 228 | 228 |
return path; |
| 229 | 229 |
} |
Formats disponibles : Unified diff