Revision 2105 tmp/org.txm.analec.rcp/src/org/txm/macro/urs/exploit/UnitsReferentialDensityMacro.groovy

UnitsReferentialDensityMacro.groovy (revision 2105)
13 13
import org.txm.searchengine.cqp.corpus.*
14 14
import org.txm.macro.urs.AnalecUtils
15 15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
16
def selection = []
17
for (def s : corpusViewSelections) {
18
	if (s instanceof CQPCorpus) selection << s
19
	else if (s instanceof Partition) selection.addAll(s.getParts())
19 20
}
20 21

  
21
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
22
if (selection.size() == 0) {
23
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
24
	return false
25
} else {
26
	for (def c : selection) c.compute(false)
27
}
28

  
29
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
22 30
String schema_ursql
23
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
31
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=false, def="3")
24 32
int minimum_schema_size
25
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
33
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=false, def="9999999")
26 34
int maximum_schema_size
27 35
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
28 36
String unit_ursql
29
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
37
@Field @Option(name="position_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
30 38
int position_in_schema
31
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
39
@Field @Option(name="cql_limit", usage="CQL to build structure limits", widget="Query", required=false, def="")
32 40
cql_limit
33
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
41
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=false, def="true")
34 42
strict_inclusion
35
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
43
@Field @Option(name="position_in_matches", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=false, def="0")
36 44
position_in_matches
37 45

  
38 46
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
......
42 50
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
43 51

  
44 52

  
45
CQPCorpus corpus = corpusViewSelection
53
for (def corpus : selection) {
46 54
def analecCorpus = URSCorpora.getCorpus(corpus);
47 55

  
48 56
int nMots = corpus.getSize();
......
53 61
int nUnites = units.size();
54 62

  
55 63
coef = (nUnites /nMots)
56
println "Densité référentielle : nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
64
println "$corpus referential density: nUnites/nMots = $nUnites/$nMots = $coef = ${coef*100}%"
57 65
if (nUnites >= nMots) {
58
	println "WARNING: possible encoding error. Number of units ($nUnites) is greater than number of words ($nMots)"
66
	println "WARNING: possible encoding error in $corpus. Number of units ($nUnites) is greater than number of words ($nMots)"
59 67
}
60
return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
68
//return ["result":coef, "data":["nUnites":nUnites, "nMots":nMots]]
69
}

Also available in: Unified diff