Révision 2082

tmp/org.txm.analec.rcp/plugin.xml (revision 2082)
22 22
            <menu
23 23
                  id="menu.urs.tools"
24 24
                  label="Tools">
25
               <visibleWhen
26
                     checkEnabled="false">
27
                  <reference
28
                        definitionId="OneCorpusSelected">
29
                  </reference>
30
               </visibleWhen>
31 25
               <menu
32 26
                     label="Annotation">
33
                  <command
34
                        commandId="org.txm.rcp.commands.ExecuteMacro"
35
                        label="Annotate concordance"
36
                        style="push">
37
                     <parameter
38
                           name="org.txm.rcp.command.parameter.file"
39
                           value="org/txm/macro/urs/edit/ConcordanceToUnitMacro.groovy">
40
                     </parameter>
41
                  </command>
42
                  <command
43
                        commandId="org.txm.rcp.commands.ExecuteMacro"
44
                        label="Reset annotation"
45
                        style="push">
46
                     <parameter
47
                           name="org.txm.rcp.command.parameter.file"
48
                           value="org/txm/macro/urs/edit/ResetAnnotationsMacro.groovy">
49
                     </parameter>
50
                  </command>
51
                  <command
52
                        commandId="org.txm.rcp.commands.ExecuteMacro"
53
                        label="SI to Singleton"
54
                        style="push">
55
                     <parameter
56
                           name="org.txm.rcp.command.parameter.file"
57
                           value="org/txm/macro/urs/edit/Si2SingletonMacro.groovy">
58
                     </parameter>
59
                  </command>
60
                  <command
61
                        commandId="org.txm.rcp.commands.ExecuteMacro"
62
                        label="Annotate selection of units"
63
                        style="push">
64
                     <parameter
65
                           name="org.txm.rcp.command.parameter.file"
66
                           value="org/txm/macro/urs/edit/UnitsAnnotateMacro.groovy">
67
                     </parameter>
68
                  </command>
27
                  <dynamic
28
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
29
                        id="edit">
30
                  </dynamic>
69 31
               </menu>
70 32
               <menu
71 33
                     label="Verification">
72
                  <command
73
                        commandId="org.txm.rcp.commands.ExecuteMacro"
74
                        label="Check duplicated units in schema"
75
                        style="push">
76
                     <parameter
77
                           name="org.txm.rcp.command.parameter.file"
78
                           value="org/txm/macro/urs/edit/CheckDuplicatesInSchemasMacro.groovy">
79
                     </parameter>
80
                  </command>
81
                  <command
82
                        commandId="org.txm.rcp.commands.ExecuteMacro"
83
                        label="Check annotation structure values"
84
                        style="push">
85
                     <parameter
86
                           name="org.txm.rcp.command.parameter.file"
87
                           value="org/txm/macro/urs/edit/CheckAnnotationStructureValuesMacro.groovy">
88
                     </parameter>
89
                  </command>
34
                  <dynamic
35
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
36
                        id="check">
37
                  </dynamic>
90 38
               </menu>
91 39
               <menu
92 40
                     label="Exploitation">
93
                  <command
94
                        commandId="org.txm.rcp.commands.ExecuteMacro"
95
                        label="Units summary"
96
                        style="push">
97
                     <parameter
98
                           name="org.txm.rcp.command.parameter.file"
99
                           value="org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy">
100
                     </parameter>
101
                  </command>
102
                  <command
103
                        commandId="org.txm.rcp.commands.ExecuteMacro"
104
                        label="Units list"
105
                        style="push">
106
                     <parameter
107
                           name="org.txm.rcp.command.parameter.file"
108
                           value="org/txm/macro/urs/exploit/UnitsListMacro.groovy">
109
                     </parameter>
110
                  </command>
111
                  <command
112
                        commandId="org.txm.rcp.commands.ExecuteMacro"
113
                        label="Units index"
114
                        style="push">
115
                     <parameter
116
                           name="org.txm.rcp.command.parameter.file"
117
                           value="org/txm/macro/urs/exploit/UnitsIndexMacro.groovy">
118
                     </parameter>
119
                  </command>
120
                  <command
121
                        commandId="org.txm.rcp.commands.ExecuteMacro"
122
                        label="Schemas progression"
123
                        style="push">
124
                     <parameter
125
                           name="org.txm.rcp.command.parameter.file"
126
                           value="org/txm/macro/urs/exploit/SchemasProgressionMacro.groovy">
127
                     </parameter>
128
                  </command>
129
                  <command
130
                        commandId="org.txm.rcp.commands.ExecuteMacro"
131
                        label="Schemas summary"
132
                        style="push">
133
                     <parameter
134
                           name="org.txm.rcp.command.parameter.file"
135
                           value="org/txm/macro/urs/exploit/SchemasSummaryMacro.groovy">
136
                     </parameter>
137
                  </command>
138
               </menu>
139
               <menu
140
                     label="Export">
141
                  <command
142
                        commandId="org.txm.rcp.commands.ExecuteMacro"
143
                        label="Export to Glozz"
144
                        style="push">
145
                     <parameter
146
                           name="org.txm.rcp.command.parameter.file"
147
                           value="org/txm/macro/urs/export/ExportToGlozzMacro.groovy">
148
                     </parameter>
149
                  </command>
150
                  <command
151
                        commandId="org.txm.rcp.commands.ExecuteMacro"
152
                        label="Export as..."
153
                        style="push">
154
                     <parameter
155
                           name="org.txm.rcp.command.parameter.file"
156
                           value="org/txm/macro/urs/export/ExportAsMacro.groovy">
157
                     </parameter>
158
                  </command>
159
               </menu>
160
               <menu
161
                     label="Exploitation2">
162 41
                  <dynamic
163 42
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
164 43
                        id="exploit">
165 44
                  </dynamic>
166 45
               </menu>
167 46
               <menu
168
                     label="Annotation2">
47
                     label="Export">
169 48
                  <dynamic
170 49
                        class="org.txm.annotation.urs.commands.URSToolsMenuContribution"
171
                        id="edit">
50
                        id="export">
172 51
                  </dynamic>
173 52
               </menu>
174 53
            </menu>
tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 2082)
171 171
 org.txm.macro.urs.democrat,
172 172
 org.txm.macro.urs.edit,
173 173
 org.txm.macro.urs.exploit,
174
 org.txm.macro.urs.exploit.mesures1,
175 174
 org.txm.macro.urs.export,
176 175
 org.txm.macro.urs.misc,
177 176
 visuAnalec,
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckDuplicatesInSchemasMacro.groovy (revision 2082)
1
package org.txm.macro.urs.check
2

  
3
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
4
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
5
// @author mdecorde
6
// @author sheiden
7
// STANDARD DECLARATIONS
8

  
9
import groovy.transform.Field
10

  
11
import org.jfree.chart.JFreeChart
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.annotation.urs.*
15
import org.txm.macro.urs.AnalecUtils
16
import org.txm.rcp.Application
17
import org.txm.rcp.IImageKeys
18
import org.txm.rcp.swt.widget.parameters.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20
import org.txm.searchengine.cqp.corpus.*
21
import org.txm.searchengine.cqp.corpus.query.CQLQuery
22

  
23
import visuAnalec.elements.*
24

  
25
def scriptName = this.class.getSimpleName()
26

  
27
if (!(corpusViewSelection instanceof CQPCorpus)) {
28
	println "** $scriptName please select a Corpus to run the macro"
29
	return;
30
}
31

  
32
// BEGINNING OF PARAMETERS
33
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
34
		String schema_ursql
35
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
36
		String schema_property_display
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
		debug
39
if (!ParametersDialog.open(this)) return
40
	if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
41

  
42
def CQI = CQPSearchEngine.getCqiClient()
43

  
44
def corpus = corpusViewSelection
45

  
46
def word = corpus.getWordProperty()
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

  
49
if (schema_property_display.length() > 0) {
50
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size()
51
	if (errors > 0) {
52
		println "Error: some Schema types don't contain the $schema_property_display property: $errors"
53
		return
54
	}
55
}
56

  
57
def allUnits = [:]
58

  
59
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE)
60

  
61
if (allSchemas.size() == 0) {
62
	println "No schema match for '$schema_ursql' selection. Aborting"
63
	return
64
}
65

  
66
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas)
67

  
68
if (allUnits.size() == 0) {
69
	println "No unit selection. Aborting"
70
	return
71
}
72

  
73
if (debug) println "allUnits=${allUnits.size()}"
74

  
75
def duplicates = [:]
76
for (Schema schema : allSchemas) {
77
	def units = allUnits[schema];
78
	for (def unit : units) {
79
		if (!duplicates.containsKey(unit)) duplicates[unit] = []
80
		duplicates[unit] << schema
81
	}
82
}
83

  
84
def units = []
85
units.addAll(duplicates.keySet()) // remove non duplicates from hash
86
for (def unit : units) {
87
	if (duplicates[unit].size() < 2) duplicates.remove(unit)
88
}
89

  
90
if (duplicates.size() > 0) {
91
	println "Duplicates found"
92
	for (def unit : duplicates.keySet()) {
93
		println AnalecUtils.toString(CQI, word, unit)+" in: "
94
		for (Schema schema : duplicates[unit]) {
95
			println " '"+schema.getProp(schema_property_display)+"'\t"+schema.getProps()
96
		}
97
	}
98
} else {
99
	println "No duplicates found in $schema_ursql units"
100
}
101

  
102
return duplicates
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/check/CheckAnnotationStructureValuesMacro.groovy (revision 2082)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.check
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.annotation.urs.*
8
import org.txm.searchengine.cqp.corpus.*
9
import visuAnalec.elements.*
10

  
11
if (!(corpusViewSelection instanceof MainCorpus)) {
12
	println "Corpora selection is not a Corpus"
13
	return;
14
}
15

  
16
@Field @Option(name="unit_type",usage="", widget="String", required=false, def="MENTION")
17
String unit_type
18

  
19
@Field @Option(name="unit_property_name", usage="", widget="String", required=false, def="REF")
20
String unit_property_name
21

  
22
@Field @Option(name="pruneUnusedValues", usage="", widget="Boolean", required=false, def="false")
23
boolean pruneUnusedValues
24

  
25
// Open the parameters input dialog box
26
if (!ParametersDialog.open(this)) return;
27

  
28
// END OF PARAMETERS
29

  
30
MainCorpus corpus = corpusViewSelection
31
def analecCorpus = URSCorpora.getCorpus(corpus);
32
def structure = analecCorpus.getStructure()
33

  
34
def unit_types = structure.getTypes(Unite.class);
35
if (!unit_types.contains(unit_type)) {
36
	println "Missing unit type: $unit_type"
37
	return
38
}
39

  
40
def props = analecCorpus.getStructure().getUniteProperties(unit_type);
41
if (!props.contains(unit_property_name)) {
42
	println "No properties '$unit_property_name' in '$unit_type' unit."
43
	return
44
}
45

  
46
def prop  = unit_property_name
47
	
48
def tmpvalues = new HashSet()
49
tmpvalues.addAll(structure.getValeursProp(Unite.class, unit_type, prop));
50
println "Values stored in the structure: $tmpvalues"
51
	
52
def used_values = new HashSet();
53
def unites = analecCorpus.getUnites(unit_type);
54
for (Unite unite : unites) {
55
	used_values.add(unite.getProp(prop))
56
}
57
	
58
tmpvalues.removeAll(used_values);
59
if (tmpvalues.size() > 0) {
60
	println "The following values ("+tmpvalues.size()+") are not used: "+tmpvalues.join(", ")
61
	if (pruneUnusedValues) {
62
		println "Pruning the values..."
63
		for (String val : tmpvalues) {
64
			structure.supprimerVal(Unite.class, unit_type, unit_property_name, val);
65
		}
66
	} 
67
} else {
68
	println "All the values are used."
69
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/AllMesuresMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9

  
10
import groovy.transform.Field
11

  
12
import org.txm.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.apache.commons.lang.StringUtils;
17

  
18
// BEGINNING OF PARAMETERS
19

  
20
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv")
21
File tsvFile
22

  
23
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
24
String default_schema_ursql
25

  
26
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3")
27
int default_minimum_schema_size
28

  
29

  
30
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF")
31
String schema_property_display_name
32

  
33
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
34
String default_unit_ursql
35

  
36
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word")
37
String default_word_property
38
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE")
39
String default_pos_property
40

  
41
if (!ParametersDialog.open(this)) return;
42
// END OF PARAMETERS
43

  
44
println "Corpora selections: "+corpusViewSelections
45

  
46
table = [] // contains all table lines
47
mesures = []
48

  
49
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view
50
	if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus
51
	def line = [] ; table << line // create and add a new table line
52
	line << corpus.getID() // add the corpus name in the first column
53
	
54
	println "*** Computing mesures for $corpus" // each macro return a "result" and a "data"
55

  
56
	params = [
57
		"unit_ursql":default_unit_ursql,
58
	]
59
	returnedValue = execMesure(ReferentialDensityMacro, line, corpus, params)
60
	line << returnedValue["result"]
61
	
62
	params = [
63
		"schema_ursql":default_schema_ursql,
64
		"minimum_schema_size":default_minimum_schema_size,
65
		"unit_ursql":default_unit_ursql,
66
	]
67
	returnedValue = execMesure(MeanDistanceMacro, line, corpus, params)
68
	line << returnedValue["result"]
69
	
70
	params = [
71
		"schema_ursql":default_schema_ursql,
72
		"minimum_schema_size":default_minimum_schema_size,
73
		"unit_ursql":default_unit_ursql,
74
	]
75
	returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params)
76
	line << returnedValue["result"]
77
	
78
	params = [
79
		"schema_ursql":default_schema_ursql,
80
		"minimum_schema_size":default_minimum_schema_size,
81
		"schema_property_display_name":schema_property_display_name,
82
		"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre",
83
		"word_property":default_word_property,
84
	]
85
	returnedValue = execMesure(StabilityScoreMacro, line, corpus, params)
86
	line << returnedValue["result"]
87
	
88
	params = [
89
		"schema_ursql":default_schema_ursql,
90
		"minimum_schema_size":default_minimum_schema_size,
91
		"unit_ursql":default_unit_ursql,
92
	]
93
	returnedValue = execMesure(InterDistanceMacro, line, corpus, params)
94
	line << returnedValue["result"]
95

  
96
	params = [
97
		"schema_ursql":default_schema_ursql,
98
		"minimum_schema_size":default_minimum_schema_size,
99
		"unit_ursql":default_unit_ursql,
100
		"word_property":default_pos_property,
101
	]
102
	returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params)
103
	line << returnedValue["data"]
104
	
105
	params = [
106
		"schema_ursql":default_schema_ursql,
107
		"minimum_schema_size":default_minimum_schema_size,
108
		"schema_property_display_name":schema_property_display_name,
109
		"unit_ursql":default_unit_ursql,
110
		"word_property":default_pos_property,
111
	]
112
	returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params)
113
	line << returnedValue["data"]
114
}
115

  
116
// WRITE RESULTS IN THE TSV FILE
117
tsvFile.withWriter("UTF-8") { writer ->
118
	writer.println "\t"+mesures.join("\t")
119
	table.each { line -> writer.println line.join("\t")	}
120
}
121

  
122
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file."
123

  
124
// UTILITY FUNCTIONS
125
def execMesure(def mesure, def line, def corpus, def params) {
126
	def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro"))
127
	mesures << m
128
	println "***** ${mesures.size()}- $m with parameters: $params"
129
	def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor])
130
	if (r == null) throw new Exception("Null result");
131
	return r;
132
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/GrammaticalCategoryMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.*
16
import org.txm.searchengine.cqp.corpus.*
17
import org.apache.commons.lang.StringUtils;
18

  
19
// BEGINNING OF PARAMETERS
20

  
21
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
22
String schema_ursql
23

  
24
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
25
int minimum_schema_size
26

  
27
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
28
String schema_display_property_name
29

  
30
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
31
String unit_ursql
32

  
33
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE")
34
String property
35

  
36
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
37
debug
38

  
39
if (!(corpusViewSelection instanceof CQPCorpus)) {
40
	println "Corpora selection is not a Corpus"
41
	return;
42
}
43

  
44
// Open the parameters input dialog box
45
if (!ParametersDialog.open(this)) return;
46
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
47

  
48
// END OF PARAMETERS
49

  
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
54
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
55
	return;
56
}
57

  
58
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
59
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
60
	return;
61
}
62

  
63
def CQI = CQPSearchEngine.getCqiClient()
64

  
65
def prop = corpus.getProperty(property)
66
if (prop == null) {
67
	analecCorpus.getStructure()
68
}
69
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
70
def allFreqs = [:]
71
def n = 0
72
for (def schema : schemas) {
73
	n++
74
	
75
	def freqs = [:]
76
		
77
	def allUnites = schema.getUnitesSousjacentesNonTriees()
78

  
79
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
80
	
81
	for (def unit : units) { // no need to sort units
82

  
83
		String forme =  null;
84
		if (prop == null) { // property is the analec unit property to use
85
			forme = unit.getProp(property)
86
		} else {
87
			int[] pos = null;
88
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
89
			else pos = unit.getDeb()..unit.getFin()
90
				
91
			forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
92
		}
93
		
94
		if (!freqs.containsKey(forme)) freqs[forme] = 0;
95
		freqs[forme] = freqs[forme] + 1;
96
		
97
		if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0;
98
		allFreqs[forme] = allFreqs[forme] + 1;
99
	}
100
	
101
	if (schema_display_property_name != null) {
102
		println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : "
103
	} else {
104
		println "Index des natures de $schema_ursql - $n : "
105
	}
106
	
107
	int max = 0;
108
	def result = "";
109
	for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
110
		println forme.key+"\t"+forme.value
111
	}
112
}
113

  
114
int max = 0;
115
def result = "";
116

  
117
println "Index des natures de $schema_ursql : "
118
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
119
	println forme.key+"\t"+forme.value
120
	if (max < forme.value) {
121
		max = forme.value
122
		result = "$forme: "+forme.value
123
	}
124
}
125

  
126
return ["result":result, "data":allFreqs]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsSummaryMacro.groovy (revision 2082)
45 45
		String unit_ursql
46 46
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
47 47
		int limit_distance_in_schema
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
48
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=false, def="")
49 49
		limit_cql
50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
50
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
51 51
		boolean strict_inclusion
52 52
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
53 53
		int limit_distance
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemasMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit
8

  
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import visuAnalec.elements.*
14
import org.txm.searchengine.cqp.corpus.*
15
import org.txm.macro.urs.AnalecUtils
16
import org.txm.Toolbox
17
import org.txm.rcp.commands.*
18
import org.apache.commons.lang.StringUtils
19
import org.txm.searchengine.cqp.CQPSearchEngine
20

  
21
if (!(corpusViewSelection instanceof CQPCorpus)) {
22
	println "Corpus view selection is not a Corpus"
23
	return;
24
}
25

  
26
// BEGINNING OF PARAMETERS
27
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
28
String schema_ursql
29

  
30
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
31
int minimum_schema_size
32

  
33
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
34
String unit_ursql
35

  
36
@Field @Option(name="word_property", usage="", widget="StringArray", metaVar="word	lemma	frlemma	frolemma	#forme#	id", required=false, def="word")
37
String word_property
38

  
39
@Field @Option(name="separator", usage="", widget="String", required=true, def=", ")
40
String separator
41

  
42
@Field @Option(name="buildCQL", usage="générer la requête des unités", widget="Boolean", required=true, def='false')
43
def buildCQL
44

  
45
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
46
debug
47

  
48
if (!ParametersDialog.open(this)) return;
49
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
50

  
51

  
52
CQPCorpus corpus = corpusViewSelection
53
def analecCorpus = URSCorpora.getCorpus(corpus)
54

  
55
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
56
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
57
	return;
58
}
59

  
60
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
61
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
62
	return;
63
}
64

  
65
def CQI = CQPSearchEngine.getCqiClient()
66

  
67
if (buildCQL) {
68
	word_prop = corpus.getProperty("id")
69
} else {
70
	word_prop = corpus.getProperty(word_property)
71
}
72

  
73
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
74
schemas.sort() {it.getProps()}
75
def nSchemas = 0
76

  
77
def lens = [:]
78
for (def schema : schemas) {
79

  
80
	def allUnites = schema.getUnitesSousjacentesNonTriees()
81
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
82
	
83
	print schema.getProps().toString()+ ": "
84
	def first = true
85
	for (def unit : units) {
86

  
87
		String forme =  null;
88

  
89
		if (buildCQL) {
90
			int[] pos = null
91
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
92
			else pos = (unit.getDeb()..unit.getFin())
93
			def first2= true
94
			q = ""
95
			pos.each {
96
				if (first2) { first2 = false } else { q = q+" " }
97
				int[] pos2 = [it]
98
				q = q+"["+word_prop+"=\""+CQI.cpos2Str(word_prop.getQualifiedName(), pos2)[0]+"\"]"
99
			}
100
			if (first) { first = false } else { print "|" }
101
			print "("+q+")"
102
		} else {
103
			if (word_prop == null) { // word_property is the analec unit property to use
104
				forme = unit.getProp(word_property)
105
			} else {
106
				int[] pos = null
107
				if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
108
				else pos = (unit.getDeb()..unit.getFin())
109
				
110
					forme = StringUtils.join(CQI.cpos2Str(word_prop.getQualifiedName(), pos), " ") // ids is enough
111
			}
112

  
113
			if (first) { first = false } else { print separator }
114
			print forme
115
		}
116
	}
117
	println ""
118

  
119
	nSchemas++
120
}
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/MeanDistanceMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5

  
6
// STANDARD DECLARATIONS
7
package org.txm.macro.urs.exploit
8

  
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.macro.urs.AnalecUtils
14
import visuAnalec.elements.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.Toolbox
17
import org.txm.rcp.commands.*
18
import org.txm.statsengine.r.core.RWorkspace
19

  
20
if (!(corpusViewSelection instanceof CQPCorpus)) {
21
	println "Corpora selection is not a Corpus"
22
	return;
23
}
24

  
25
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
26
String schema_ursql
27

  
28
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
29
int minimum_schema_size
30

  
31
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
32
String schema_property_display
33

  
34
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
35
String unit_ursql
36

  
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
debug
39

  
40
if (!ParametersDialog.open(this)) return;
41
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
42

  
43
def corpus = corpusViewSelection
44
def analecCorpus = URSCorpora.getCorpus(corpus)
45

  
46
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
47
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
48
	return;
49
}
50

  
51
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
52
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
53
	return;
54
}
55

  
56
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
57

  
58
int nSchemas = 0;
59

  
60
def lens = [:]
61
def lensnames = [:]
62
for (def schema : schemas) {
63

  
64
	def allUnites = schema.getUnitesSousjacentesNonTriees()
65
	
66
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
67
	
68
	int nUnites = units.size();
69
		
70
	if (!lens.containsKey(nUnites)) {
71
		lens[nUnites] = 0;
72
		lensnames[nUnites] = [];
73
	}
74
	
75
	lens[nUnites] = lens[nUnites] + 1;
76
	lensnames[nUnites] << schema.getProp(schema_property_display)
77
	nSchemas++;
78
}
79

  
80
//println "nSchemas=$nSchemas"
81
def freqs = lens.keySet();
82
freqs.sort();
83
int t = 0;
84
int n = 0;
85
//println "Fréquences ("+freqs.size()+")"
86
for (def f : freqs) {
87
	t += f * lens[f]
88
	n += lens[f]
89
}
90

  
91
coef = (t/n)
92
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
93
def flens = []
94
slens.each { key, value -> value.times { flens << key } }
95
def nbins = flens.size()*2
96

  
97
def cfreq = 0
98
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
99
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
100
slens.each { println it.key+"	"+it.value+"	"+(cfreq+=it.value)+"	"+lensnames[it.key] }
101

  
102
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
103

  
104
def r = RWorkspace.getRWorkspaceInstance()
105

  
106
r.addVectorToWorkspace("len", slens2.keySet() as int[])
107
r.addVectorToWorkspace("freq", slens2.values() as int[])
108
r.addVectorToWorkspace("flen", flens as int[])
109

  
110
def corpusName = corpus.getID()
111

  
112
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
113
def PNGFilePath = PNGFile.getAbsolutePath()
114
println "PNG file: "+PNGFilePath
115

  
116
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
117
def SVGFilePath = SVGFile.getAbsolutePath()
118
println "SVG file: "+SVGFilePath
119

  
120
/// BEGINNING OF R SCRIPT
121
def script ="""
122
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
123
axis(side=1, at=len)
124
dev.off()
125
"""
126
/// END OF R SCRIPT
127

  
128
// execute R script
129
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
130
r.plot(SVGFile, "svglite(file = \"${SVGFilePath}\"); "+script)
131

  
132
//display the SVG results graphic
133
monitor.syncExec(new Runnable() {
134
	@Override
135
	public void run() { OpenBrowser.openfile(SVGFilePath, corpusName+" Longueur des chaînes") }
136
})
137

  
138
return ["result":coef, "data":lens]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/InterDistanceMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.apache.commons.lang.StringUtils;
17

  
18
if (!(corpusViewSelection instanceof CQPCorpus)) {
19
	println "Corpora selection is not a Corpus"
20
	return;
21
}
22

  
23
// BEGINNING OF PARAMETERS
24
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
25
String schema_ursql
26

  
27
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
28
int minimum_schema_size
29

  
30
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
31
String schema_display_property_name
32

  
33
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
34
String unit_ursql
35

  
36
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
37
debug
38

  
39
if (!ParametersDialog.open(this)) return;
40
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
41

  
42
CQPCorpus corpus = corpusViewSelection
43
def analecCorpus = URSCorpora.getCorpus(corpus)
44

  
45
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
46
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
47
	return;
48
}
49

  
50
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
51
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
52
	return;
53
}
54

  
55
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
56
def distances = [];
57
def nDistances = 0
58
def cadences = [];
59
for (def schema : schemas) {
60
	
61
	def allUnites = schema.getUnitesSousjacentesNonTriees()
62

  
63
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
64
	
65
	Collections.sort(units)
66
		
67
	for (int i = 0 ; i < units.size() ; i++) {
68
		int d1 = 0;
69
		int d2 = 0;
70
		if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
71
		if (d1 < 0) {
72
			//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
73
			d1 = 0; // the first unit pass the next one ?
74
		}
75
		if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
76
		if (d2 < 0) {
77
			//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
78
			d2 = 0; // the first unit pass the next one ?
79
		}
80
		distances << d1
81
		
82
		if (d1 < d2) cadences << d1 else cadences << d2
83
		
84
		nDistances++
85
	}
86
}
87
distances = distances.sort()
88
cadences = cadences.sort()
89

  
90
int distances_total = distances.sum()
91
int cadences_total = cadences.sum()
92
coef = (distances_total / nDistances)
93
cadence = (cadences_total / nDistances)
94
println "distances $distances"
95
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
96
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
97
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
98
println "cadences $cadences"
99
println "cadence moyenne : $cadences_total / $nDistances = $cadence"
100
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
101
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
102

  
103
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NatureOfTheFirstUnitMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.cqp.CQPSearchEngine
17
import org.apache.commons.lang.StringUtils;
18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
22
}
23

  
24
// BEGINNING OF PARAMETERS
25
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
26
String schema_ursql
27

  
28
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
29
int minimum_schema_size
30

  
31
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
32
String unit_ursql
33

  
34
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE")
35
String word_property
36

  
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
debug
39

  
40
if (!ParametersDialog.open(this)) return;
41
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
42

  
43

  
44
CQPCorpus corpus = corpusViewSelection
45
def analecCorpus = URSCorpora.getCorpus(corpus)
46

  
47
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
48
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
49
	return;
50
}
51

  
52
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
53
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
54
	return;
55
}
56

  
57
def CQI = CQPSearchEngine.getCqiClient()
58

  
59
def prop = corpus.getProperty(word_property)
60

  
61
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
62
def freqs = [:]
63

  
64
def distances = 0;
65
def nDistances = 0
66
for (def schema : schemas) {
67
	
68
	def allUnites = schema.getUnitesSousjacentesNonTriees()
69
	
70
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
71
	
72
	int nUnites = units.size();
73
	
74
	if (units.size() == 0) continue;
75
	
76
	def unit = units[0]
77
	
78
	String forme =  null;
79
	if (prop == null) { // word_property is the analec unit property to use
80
		forme = unit.getProp(word_property)
81
	} else {
82
		int[] pos = null;
83
		if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
84
		else pos = unit.getDeb()..unit.getFin()
85
			
86
		forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
87
	}
88
	
89
	if (!freqs.containsKey(forme)) freqs[forme] = 0;
90
	
91
	freqs[forme] = freqs[forme] + 1;
92
}
93

  
94
println "Index des natures de premier maillon :"
95
int max = 0;
96
def result = "";
97
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
98
	println "$forme\t"+freqs[forme]
99
	if (max < freqs[forme]) {
100
		max = freqs[forme]
101
		result = "$forme: "+freqs[forme]
102
	}
103
}
104

  
105
["result": result, "data": freqs]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/NumberOfSchemaMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
// BEGINNING OF PARAMETERS
22
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
23
String schema_ursql
24

  
25
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
26
int minimum_schema_size
27

  
28
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
29
String unit_ursql
30

  
31
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
32
debug
33

  
34
if (!ParametersDialog.open(this)) return;
35
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
36

  
37

  
38
CQPCorpus corpus = corpusViewSelection
39
def analecCorpus = URSCorpora.getCorpus(corpus)
40

  
41
// check Schema parameters
42
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
43
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
44
	return;
45
}
46

  
47
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
48
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
49
	return;
50
}
51

  
52
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
53

  
54
int nSchemas = schemas.size();
55

  
56
println "Nombre de chaînes de référence d'un texte : $nSchemas"
57

  
58
["result":nSchemas, "data":schemas]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/ReferentialDensityMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
22
String unit_ursql
23
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
24
limit_cql
25
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
26
strict_inclusion
27
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
28
limit_distance
29

  
30
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
31
debug
32

  
33
if (!ParametersDialog.open(this)) return;
34
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
35

  
36

  
37
CQPCorpus corpus = corpusViewSelection
38
def analecCorpus = URSCorpora.getCorpus(corpus);
39

  
40
int nMots = corpus.getSize();
41

  
42
def units = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, "", 0, 0,
43
	unit_ursql, 0, limit_cql, strict_inclusion, limit_distance);
44

  
45
int nUnites = units.size();
46

  
47
coef = (nUnites /nMots)
48
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
49
if (nUnites >= nMots) {
50
	println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)"
51
}
52
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/StabilityScoreMacro.groovy (revision 2082)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.rcp.swt.widget.parameters.*
12
import org.txm.annotation.urs.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.apache.commons.lang.StringUtils;
15
import org.txm.macro.urs.AnalecUtils
16
import visuAnalec.elements.*
17
import org.txm.searchengine.cqp.CQPSearchEngine
18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
22
}
23

  
24
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
25
String schema_ursql
26

  
27
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
28
int minimum_schema_size
29

  
30
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
31
String schema_display_property_name
32

  
33
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
34
String unit_ursql
35

  
36
@Field @Option(name="word_property", usage="", widget="String", required=false, def="word")
37
String word_property
38

  
39
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
40
debug
41

  
42
if (!ParametersDialog.open(this)) return;
43
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
44

  
45

  
46
def corpus = corpusViewSelection
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

  
49
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
50
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
51
	return;
52
}
53

  
54
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
55
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
56
	return;
57
}
58

  
59
def CQI = CQPSearchEngine.getCqiClient()
60

  
61
def prop = corpus.getProperty(word_property)
62

  
63
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
64
allFormesSet = new HashSet();
65
nUnitesGrandTotal = 0;
66
def coefs = []
67
int n = 1
68
for (def schema : schemas) {
69
	def formesSet = new HashSet(); // contient toutes les formes du CR courant
70
	nUnitesTotal = 0;
71
	
72
	def allUnites = schema.getUnitesSousjacentesNonTriees()
73

  
74
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75
	def nUnites = units.size()
76
	for (def unit : units) {
77
	
78
		String forme =  null;
79
		if (prop == null) { // word_property is the analec unit property to use
80
			forme = unit.getProp(word_property)
81
		} else {
82
			int[] pos = null;
83
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
84
			else pos = unit.getDeb()..unit.getFin()
85
				
86
			forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
87
		}
88
		
89
		formesSet.add(forme)
90
		
91
		nUnitesTotal++
92
	}
93
	if (formesSet.size() == 0) {
94
		coef = -1
95
	} else {
96
		coef = (nUnitesTotal/formesSet.size())
97
	}
98
	coefs << coef
99
	nUnitesGrandTotal += nUnitesTotal;
100
	allFormesSet.addAll(formesSet)
101
	
102
	if (schema_display_property_name != null) {
103
		print schema.getProp(schema_display_property_name)
104
	} else {
105
		print schema_ursql+"-"+n+" : "
106
	}
107
	
108
	println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} forms = $coef"
109
	n++
110
}
111

  
112
return ["result":coefs, "data":["nUnitesTotal":nUnitesGrandTotal, "allFormesSet":allFormesSet], "coef":(nUnitesGrandTotal/allFormesSet.size())]
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/CheckDuplicatesInSchemasMacro.groovy (revision 2082)
1
package org.txm.macro.urs.edit
2

  
3
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
4
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
5
// @author mdecorde
6
// @author sheiden
7
// STANDARD DECLARATIONS
8

  
9
import groovy.transform.Field
10

  
11
import org.jfree.chart.JFreeChart
12
import org.kohsuke.args4j.*
13
import org.txm.Toolbox
14
import org.txm.annotation.urs.*
15
import org.txm.macro.urs.AnalecUtils
16
import org.txm.rcp.Application
17
import org.txm.rcp.IImageKeys
18
import org.txm.rcp.swt.widget.parameters.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20
import org.txm.searchengine.cqp.corpus.*
21
import org.txm.searchengine.cqp.corpus.query.CQLQuery
22

  
23
import visuAnalec.elements.*
24

  
25
def scriptName = this.class.getSimpleName()
26

  
27
if (!(corpusViewSelection instanceof CQPCorpus)) {
28
	println "** $scriptName please select a Corpus to run the macro"
29
	return;
30
}
31

  
32
// BEGINNING OF PARAMETERS
33
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
34
		String schema_ursql
35
@Field @Option(name="schema_property_display", usage="PROP", widget="String", required=true, def="REF")
36
		String schema_property_display
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
		debug
39
if (!ParametersDialog.open(this)) return
40
	if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
41

  
42
def CQI = CQPSearchEngine.getCqiClient()
43

  
44
def corpus = corpusViewSelection
45

  
46
def word = corpus.getWordProperty()
47
def analecCorpus = URSCorpora.getCorpus(corpus)
48

  
49
if (schema_property_display.length() > 0) {
50
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql, schema_property_display).size()
51
	if (errors > 0) {
52
		println "Error: some Schema types don't contain the $schema_property_display property: $errors"
53
		return
54
	}
55
}
56

  
57
def allUnits = [:]
58

  
59
def allSchemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus , schema_ursql, -1, Integer.MAX_VALUE)
60

  
61
if (allSchemas.size() == 0) {
62
	println "No schema match for '$schema_ursql' selection. Aborting"
63
	return
64
}
65

  
66
allUnits = AnalecUtils.groupAllUnitesInElements(debug, allSchemas)
67

  
68
if (allUnits.size() == 0) {
69
	println "No unit selection. Aborting"
70
	return
71
}
72

  
73
if (debug) println "allUnits=${allUnits.size()}"
74

  
75
def duplicates = [:]
76
for (Schema schema : allSchemas) {
77
	def units = allUnits[schema];
78
	for (def unit : units) {
79
		if (!duplicates.containsKey(unit)) duplicates[unit] = []
80
		duplicates[unit] << schema
81
	}
82
}
83

  
84
def units = []
85
units.addAll(duplicates.keySet()) // remove non duplicates from hash
86
for (def unit : units) {
87
	if (duplicates[unit].size() < 2) duplicates.remove(unit)
88
}
89

  
90
if (duplicates.size() > 0) {
91
	println "Duplicates found"
92
	for (def unit : duplicates.keySet()) {
93
		println AnalecUtils.toString(CQI, word, unit)+" in: "
94
		for (Schema schema : duplicates[unit]) {
95
			println " '"+schema.getProp(schema_property_display)+"'\t"+schema.getProps()
96
		}
97
	}
98
} else {
99
	println "No duplicates found in $schema_ursql units"
100
}
101

  
102
return duplicates
tmp/org.txm.analec.rcp/src/org/txm/macro/urs/edit/CheckAnnotationStructureValuesMacro.groovy (revision 2082)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.edit
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.rcp.swt.widget.parameters.*
7
import org.txm.annotation.urs.*
8
import org.txm.searchengine.cqp.corpus.*
9
import visuAnalec.elements.*
10

  
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff