Revision 2105 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsInterdistanceMacro.groovy

UnitsInterdistanceMacro.groovy (revision 2105)
15 15
import org.txm.searchengine.cqp.corpus.*
16 16
import org.apache.commons.lang.StringUtils;
17 17

  
18
if (!(corpusViewSelection instanceof CQPCorpus)) {
19
	println "Corpora selection is not a Corpus"
20
	return;
18
def selection = []
19
for (def s : corpusViewSelections) {
20
	if (s instanceof CQPCorpus) selection << s
21
	else if (s instanceof Partition) selection.addAll(s.getParts())
21 22
}
22 23

  
24
if (selection.size() == 0) {
25
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26
	return false
27
} else {
28
	for (def c : selection) c.compute(false)
29
}
30

  
23 31
// BEGINNING OF PARAMETERS
24 32
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
25 33
String schema_ursql
......
40 48
if (!ParametersDialog.open(this)) return;
41 49
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
42 50

  
43
CQPCorpus corpus = corpusViewSelection
44
def analecCorpus = URSCorpora.getCorpus(corpus)
45

  
46
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
47
if (errors.size() > 0) {
48
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
49
	return;
50
}
51

  
52
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
53
if (errors.size() > 0) {
54
	println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
55
	return;
56
}
57

  
58
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
59
def distances = [];
60
def nDistances = 0
61
def cadences = [];
62
for (def schema : schemas) {
51
for (def corpus : selection) {
52
	def analecCorpus = URSCorpora.getCorpus(corpus)
63 53
	
64
	def allUnites = schema.getUnitesSousjacentesNonTriees()
65

  
66
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
54
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
55
	if (errors.size() > 0) {
56
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
57
		return;
58
	}
67 59
	
68
	Collections.sort(units)
60
	errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
61
	if (errors.size() > 0) {
62
		println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
63
		return;
64
	}
65
	
66
	def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
67
	def distances = [];
68
	def nDistances = 0
69
	def cadences = [];
70
	for (def schema : schemas) {
69 71
		
70
	for (int i = 0 ; i < units.size() ; i++) {
71
		int d1 = 0;
72
		int d2 = 0;
73
		if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
74
		if (d1 < 0) {
75
			//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
76
			d1 = 0; // the first unit pass the next one ?
72
		def allUnites = schema.getUnitesSousjacentesNonTriees()
73
	
74
		def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
75
		
76
		Collections.sort(units)
77
			
78
		for (int i = 0 ; i < units.size() ; i++) {
79
			int d1 = 0;
80
			int d2 = 0;
81
			if (i < units.size()-1) d1 = units[i+1].getDeb() - units[i].getFin();
82
			if (d1 < 0) {
83
				//println "D1 "+units[i+1].getDeb()+" - "+units[i].getFin()+" = "+d1
84
				d1 = 0; // the first unit pass the next one ?
85
			}
86
			if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
87
			if (d2 < 0) {
88
				//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
89
				d2 = 0; // the first unit pass the next one ?
90
			}
91
			distances << d1
92
			
93
			if (d1 < d2) cadences << d1 else cadences << d2
94
			
95
			nDistances++
77 96
		}
78
		if (i > 0) d2 = units[i].getDeb() - units[i-1].getFin();
79
		if (d2 < 0) {
80
			//println "D2 "+units[i].getDeb()+" - "+units[i-1].getFin()+" = "+d2
81
			d2 = 0; // the first unit pass the next one ?
82
		}
83
		distances << d1
84
		
85
		if (d1 < d2) cadences << d1 else cadences << d2
86
		
87
		nDistances++
88 97
	}
98
	distances = distances.sort()
99
	cadences = cadences.sort()
100
	
101
	int distances_total = distances.sum()
102
	int cadences_total = cadences.sum()
103
	coef = (distances_total / nDistances)
104
	cadence = (cadences_total / nDistances)
105
	
106
	println "$corpus distances:"
107
	//println "distances $distances"
108
	println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
109
	println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
110
	println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
111
	//println "cadences $cadences"
112
	println "cadence moyenne : $cadences_total / $nDistances = $cadence"
113
	println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
114
	println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
115
	
116
	//return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]
89 117
}
90
distances = distances.sort()
91
cadences = cadences.sort()
92

  
93
int distances_total = distances.sum()
94
int cadences_total = cadences.sum()
95
coef = (distances_total / nDistances)
96
cadence = (cadences_total / nDistances)
97
println "distances $distances"
98
println "distance moyenne inter-mayonnaise : $distances_total / $nDistances = $coef"
99
println "distance medianne inter-mayonnaise : "+distances[(int)(distances.size() / 2)]
100
println "distance quartils : "+distances[0]+" "+distances[(int)(distances.size() / 4)] + " "+distances[(int)(distances.size() / 2)]+" "+distances[(int)(3*distances.size() / 4)]+" "+distances[(int)(distances.size() -1)]
101
println "cadences $cadences"
102
println "cadence moyenne : $cadences_total / $nDistances = $cadence"
103
println "cadence medianne : "+cadences[(int)(cadences.size() / 2)]
104
println "cadence quartils : "+cadences[0]+" "+cadences[(int)(cadences.size() / 4)] + " "+cadences[(int)(cadences.size() / 2)]+" "+cadences[(int)(3*cadences.size() / 4)]+" "+cadences[(int)(cadences.size() -1)]
105

  
106
return ["result":coef, "result2":cadence, "data":["distances":distances, "nDistances":nDistances, "cadences":cadences]]

Also available in: Unified diff