Revision 2105 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialStabilityMacro.groovy

UnitsReferentialStabilityMacro.groovy (revision 2105)
16 16
import visuAnalec.elements.*
17 17
import org.txm.searchengine.cqp.CQPSearchEngine
18 18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
19
def selection = []
20
for (def s : corpusViewSelections) {
21
	if (s instanceof CQPCorpus) selection << s
22
	else if (s instanceof Partition) selection.addAll(s.getParts())
22 23
}
23 24

  
25
if (selection.size() == 0) {
26
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
27
	return false
28
} else {
29
	for (def c : selection) c.compute(false)
30
}
31

  
24 32
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
25 33
String schema_ursql
26 34

  
......
47 55
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
48 56

  
49 57

  
50
def corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
54
if (errors.size() > 0) {
55
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
56
	return;
57
}
58

  
59
errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
60
if (errors.size() > 0) {
61
	println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
62
	return;
63
}
64
def CQI = CQPSearchEngine.getCqiClient()
65

  
66
def prop = corpus.getProperty(word_property)
67
if (prop == null) { // no CQP property called $word_property
68
	errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property)
58
for (def corpus : selection) {
59
	def analecCorpus = URSCorpora.getCorpus(corpus)
60
	
61
	def errors = AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)
69 62
	if (errors.size() > 0) {
70
		println "** $word_property unit property cannot be computed in the corpus with types: $errors."
63
		println "** The $schema_ursql schema URSQL cannot be computed in the corpus with types: $errors."
71 64
		return;
72 65
	}
73
}
74
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
75
allFormesSet = new HashSet();
76
def coefs = []
77
int n = 1
78

  
79
int nUnitesAllSchemas = 0
80
int nUnitesTotalSchemas = 0
81

  
82
for (def schema : schemas) {
83
	def formesSet = new HashSet(); // contient toutes les formes du CR courant
84
	nUnitesTotal = 0;
85 66
	
86
	def allUnites = schema.getUnitesSousjacentesNonTriees()
87

  
88
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
89
	def nUnites = schema.getUnitesSousjacentes().size()
90
	def nUnitesTotal = units.size()
91
	for (def unit : units) {
67
	errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)
68
	if (errors.size() > 0) {
69
		println "** $unit_ursql unit URSQL cannot be computed in the corpus with types: $errors."
70
		return;
71
	}
72
	def CQI = CQPSearchEngine.getCqiClient()
92 73
	
93
		String forme =  null;
94
		if (prop == null) { // word_property is the analec unit property to use
95
			forme = unit.getProp(word_property)
96
		} else {
97
			int[] pos = null;
98
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
99
			else pos = unit.getDeb()..unit.getFin()
100
				
101
			forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
74
	def prop = corpus.getProperty(word_property)
75
	if (prop == null) { // no CQP property called $word_property
76
		errors=AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, AnalecUtils.getFilterParameters(unit_ursql)[0], word_property)
77
		if (errors.size() > 0) {
78
			println "** $word_property unit property cannot be computed in the corpus with types: $errors."
79
			return;
102 80
		}
103
		
104
		formesSet.add(forme)
105 81
	}
82
	def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size);
83
	allFormesSet = new HashSet();
84
	def coefs = []
85
	int n = 1
106 86
	
107
	if (formesSet.size() == 0 || nUnitesTotal == 0) {
108
		coef = "NA"
109
	} else {
110
		coef = (nUnitesTotal/formesSet.size())
111
	}
112
	coefs << coef
113
	allFormesSet.addAll(formesSet)
87
	int nUnitesAllSchemas = 0
88
	int nUnitesTotalSchemas = 0
114 89
	
115
	if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
116
		print schema.getProp(schema_display_property_name)
117
	} else {
118
		print schema_ursql+"-"+n+" : "
119
	}
90
	println "** $corpus schemas: "
91
	for (def schema : schemas) {
92
		def formesSet = new HashSet(); // contient toutes les formes du CR courant
93
		nUnitesTotal = 0;
94
		
95
		def allUnites = schema.getUnitesSousjacentesNonTriees()
120 96
	
121
	println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
122
	if (show_values) {
123
		println "\t${word_property}s="+formesSet
97
		def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
98
		def nUnites = schema.getUnitesSousjacentes().size()
99
		def nUnitesTotal = units.size()
100
		for (def unit : units) {
101
		
102
			String forme =  null;
103
			if (prop == null) { // word_property is the analec unit property to use
104
				forme = unit.getProp(word_property)
105
			} else {
106
				int[] pos = null;
107
				if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
108
				else pos = unit.getDeb()..unit.getFin()
109
					
110
				forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
111
			}
112
			
113
			formesSet.add(forme)
114
		}
115
		
116
		if (formesSet.size() == 0 || nUnitesTotal == 0) {
117
			coef = "NA"
118
		} else {
119
			coef = (nUnitesTotal/formesSet.size())
120
		}
121
		coefs << coef
122
		allFormesSet.addAll(formesSet)
123
		
124
		if (schema_display_property_name != null && schema_display_property_name.length() > 0) {
125
			print schema.getProp(schema_display_property_name)
126
		} else {
127
			print schema_ursql+"-"+n+" : "
128
		}
129
		
130
		println " ($nUnites units) : $nUnitesTotal selected units / ${formesSet.size()} ${word_property}s = $coef"
131
		if (show_values) {
132
			println "\t${word_property}s="+formesSet
133
		}
134
		n++
135
		
136
		nUnitesAllSchemas += nUnites
137
		nUnitesTotalSchemas += nUnitesTotal
124 138
	}
125
	n++
126 139
	
127
	nUnitesAllSchemas += nUnites
128
	nUnitesTotalSchemas += nUnitesTotal
140
	coef = nUnitesTotalSchemas/allFormesSet.size()
141
	//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
142
	
143
//	return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)]
129 144
}
130

  
131
coef = nUnitesTotalSchemas/allFormesSet.size()
132
//println "ALL : ($nUnitesAllSchemas units) : $nUnitesTotalSchemas selected units / ${allFormesSet.size()} ${word_property}s = $coef"
133

  
134
return ["result":coefs, "data":["nUnitesTotal":nUnitesTotalSchemas, "allFormesSet":allFormesSet], "coef":(coef)]

Also available in: Unified diff