Revision 2105 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/SchemaLengthsMacro.groovy

SchemaLengthsMacro.groovy (revision 2105)
17 17
import org.txm.rcp.commands.*
18 18
import org.txm.statsengine.r.core.RWorkspace
19 19

  
20
if (!(corpusViewSelection instanceof CQPCorpus)) {
21
	println "Corpora selection is not a Corpus"
22
	return;
20
def selection = []
21
for (def s : corpusViewSelections) {
22
	if (s instanceof CQPCorpus) selection << s
23
	else if (s instanceof Partition) selection.addAll(s.getParts())
23 24
}
24 25

  
26
if (selection.size() == 0) {
27
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
28
	return false
29
} else {
30
	for (def c : selection) c.compute(false)
31
}
32

  
25 33
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
26
String schema_ursql
34
		String schema_ursql
27 35

  
28 36
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
29
int minimum_schema_size
37
		int minimum_schema_size
30 38
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
31
int maximum_schema_size
39
		int maximum_schema_size
32 40
@Field @Option(name="schema_property_display", usage="schema property to show", widget="String", required=true, def="REF")
33
String schema_property_display
41
		String schema_property_display
34 42

  
35 43
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
36
String unit_ursql
37

  
44
		String unit_ursql
45
//@Field @Option(name="output_graph", usage="Show chart", widget="Boolean", required=true, def="false")
46
		output_graph = false
38 47
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
39
debug
48
		debug
40 49

  
41 50
if (!ParametersDialog.open(this)) return;
42 51
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
43 52

  
44
def corpus = corpusViewSelection
45
def analecCorpus = URSCorpora.getCorpus(corpus)
53
for (def corpus : selection) {
54
	def analecCorpus = URSCorpora.getCorpus(corpus)
46 55

  
47
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
48
if (errors.size() > 0) {
49
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
50
	return;
51
}
56
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
57
	if (errors.size() > 0) {
58
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
59
		return;
60
	}
52 61

  
53
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
54
if (errors.size() > 0) {
55
	println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
56
	return;
57
}
62
	errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
63
	if (errors.size() > 0) {
64
		println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
65
		return;
66
	}
58 67

  
59
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
68
	def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
60 69

  
61
int nSchemas = 0;
70
	int nSchemas = 0;
62 71

  
63
def lens = [:]
64
def lensnames = [:]
65
for (def schema : schemas) {
72
	def lens = [:]
73
	def lensnames = [:]
74
	for (def schema : schemas) {
66 75

  
67
	def allUnites = schema.getUnitesSousjacentesNonTriees()
68
	
69
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
70
	
71
	int nUnites = units.size();
72
		
73
	if (!lens.containsKey(nUnites)) {
74
		lens[nUnites] = 0;
75
		lensnames[nUnites] = [];
76
	}
77
	
78
	lens[nUnites] = lens[nUnites] + 1;
79
	lensnames[nUnites] << schema.getProp(schema_property_display)
80
	nSchemas++;
81
}
76
		def allUnites = schema.getUnitesSousjacentesNonTriees()
82 77

  
83
//println "nSchemas=$nSchemas"
84
def freqs = lens.keySet();
85
freqs.sort();
86
int t = 0;
87
int n = 0;
88
//println "Fréquences ("+freqs.size()+")"
89
for (def f : freqs) {
90
	t += f * lens[f]
91
	n += lens[f]
92
}
78
		def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
93 79

  
94
coef = (t/n)
95
def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
96
def flens = []
97
slens.each { key, value -> value.times { flens << key } }
98
def nbins = flens.size()*2
80
		int nUnites = units.size();
99 81

  
100
def cfreq = 0
101
println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
102
println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
103
slens.each { println it.key+"	"+it.value+"	"+(cfreq+=it.value)+"	"+lensnames[it.key] }
82
		if (!lens.containsKey(nUnites)) {
83
			lens[nUnites] = 0;
84
			lensnames[nUnites] = [];
85
		}
104 86

  
105
def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
87
		lens[nUnites] = lens[nUnites] + 1;
88
		lensnames[nUnites] << schema.getProp(schema_property_display)
89
		nSchemas++;
90
	}
106 91

  
107
def r = RWorkspace.getRWorkspaceInstance()
92
	//println "nSchemas=$nSchemas"
93
	def freqs = lens.keySet();
94
	freqs.sort();
95
	int t = 0;
96
	int n = 0;
97
	//println "Fréquences ("+freqs.size()+")"
98
	for (def f : freqs) {
99
		t += f * lens[f]
100
		n += lens[f]
101
	}
108 102

  
109
r.addVectorToWorkspace("len", slens2.keySet() as int[])
110
r.addVectorToWorkspace("freq", slens2.values() as int[])
111
r.addVectorToWorkspace("flen", flens as int[])
103
	coef = (t/n)
104
	def slens = lens.sort { a, b -> -a.value <=> -b.value ?: -a.key <=> -b.key }
105
	def flens = []
106
	slens.each { key, value -> value.times { flens << key } }
107
	def nbins = flens.size()*2
112 108

  
113
def corpusName = corpus.getID()
109
	def cfreq = 0
110
	println "Longueur moyenne des chaînes de référence : $t/$n = "+coef
111
	println "Index hiérarchique des longueurs de chaînes :\nlen\tfreq\tcfreq"
112
	slens.each { println it.key+"	"+it.value+"	"+(cfreq+=it.value)+"	"+lensnames[it.key] }
114 113

  
115
def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
116
def PNGFilePath = PNGFile.getAbsolutePath()
117
println "PNG file: "+PNGFilePath
114
	def slens2 = slens.sort { a, b -> -a.key <=> -b.key }
115
	if (output_graph) {
116
		def r = RWorkspace.getRWorkspaceInstance()
118 117

  
119
def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
120
def SVGFilePath = SVGFile.getAbsolutePath()
121
println "SVG file: "+SVGFilePath
118
		r.addVectorToWorkspace("len", slens2.keySet() as int[])
119
		r.addVectorToWorkspace("freq", slens2.values() as int[])
120
		r.addVectorToWorkspace("flen", flens as int[])
122 121

  
123
/// BEGINNING OF R SCRIPT
124
def script ="""
122
		def corpusName = corpus.getName()
123
		println "corpusName=$corpusName"
124
		def PNGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".png", new File(Toolbox.getTxmHomePath(), "results"))
125
		def PNGFilePath = PNGFile.getAbsolutePath()
126
		println "PNG file: "+PNGFilePath
127

  
128
		def SVGFile = File.createTempFile("txm_"+corpus.getID()+"_", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
129
		def SVGFilePath = SVGFile.getAbsolutePath()
130
		println "SVG file: "+SVGFilePath
131

  
132
		/// BEGINNING OF R SCRIPT
133
		def script ="""
125 134
hist(flen, xaxt='n', col="gray", xlab="Length", breaks=$nbins, main="$corpusName Longueur des chaînes ($nbins bins)")
126 135
axis(side=1, at=len)
127 136
dev.off()
128 137
"""
129
/// END OF R SCRIPT
138
		/// END OF R SCRIPT
130 139

  
131
// execute R script
132
r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
133
r.plot(SVGFile, "svglite(file = \"${SVGFilePath}\"); "+script)
140
		// execute R script
141
		r.plot(PNGFile, "png(file = \"${PNGFilePath}\"); "+script)
142
		r.plot(SVGFile, "svg(file = \"${SVGFilePath}\"); "+script)
134 143

  
135
//display the SVG results graphic
136
monitor.syncExec(new Runnable() {
137
	@Override
138
	public void run() { OpenBrowser.openfile(SVGFilePath, corpusName+" Longueur des chaînes") }
139
})
144
		//display the SVG results graphic
145
		monitor.syncExec(new Runnable() {
146
					@Override
147
					public void run() { OpenBrowser.openfile(SVGFile.getAbsolutePath(), corpusName+" Longueur des chaînes") }
148
				})
149
	}
150
	//return ["result":coef, "data":lens]
151
}
140 152

  
141
return ["result":coef, "data":lens]
153

  

Also available in: Unified diff