Révision 3590

TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/StructIndexMacro.groovy (revision 3590)
1
// Copyright © 2023 ENS de Lyon
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author sheiden
4

  
5
// STANDARD DECLARATIONS
6
package org.txm.macro.cqp
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.macro.cqp.CQPUtils
12
import org.txm.searchengine.cqp.CQPSearchEngine
13

  
14
scriptName = this.class.getSimpleName()
15

  
16
// BEGINNING OF PARAMETERS
17

  
18
@Field @Option(name="struct_props", usage="struct@prop1,prop2...", widget="String", required=true, def="")
19
def struct_props
20

  
21
@Field @Option(name="Vmax", usage="maximum number of values displayed", widget="Integer", required=true, def="100")
22
def Vmax
23

  
24
@Field @Option(name="groupByValue", usage="group results by value before counting", widget="Boolean", required=false, def="true")
25
def groupByValue
26

  
27
// Open the parameters input dialog box
28
if (!ParametersDialog.open(this)) return
29

  
30
// END OF PARAMETERS
31

  
32
utils = new CQPUtils()
33
corpusEngine = CQPSearchEngine.getCqiClient()
34

  
35
// check for a corpus selection
36
corpora = utils.getCorpora(this)
37

  
38
if ((corpora == null) || corpora.size() > 1) {
39
	println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting."
40
	return false
41
}
42

  
43
if ((struct_props == null) || struct_props.size() == 0) {
44
	println "** $scriptName: please set the 'struct_props' parameter. Aborting."
45
	return false
46
}
47

  
48
corpus = corpora[0].getMainCorpus().getName()
49

  
50
globalVmax = Vmax
51

  
52
(struct, prop) = struct_props.tokenize('@')*.trim()
53
	
54
  if ((prop == null) || prop.size() == 0) {
55
    println "** $scriptName: please set @properties in the 'struct_props' parameter."
56
  } else {
57

  
58
    if (prop.indexOf(',') == -1) {
59
      prop = [prop]
60
    } else {
61
      prop = prop.tokenize(',')*.trim()
62
    }
63

  
64
    occs = (0..corpusEngine.attributeSize("$corpus.${struct}_${prop[0]}")-1).collect { cpos ->
65
      prop.collect { prop ->
66
        corpusEngine.struc2Str("$corpus.${struct}_$prop", [cpos] as int[])[0]
67
      }.join('_')
68
    }
69
	
70
    if (groupByValue) {
71
      occs = occs.sort().countBy { it }.sort { a,b -> b.value <=> a.value ?: a.key <=> b.key }
72
      if (globalVmax == 0) {
73
      	occs.each { println sprintf("%s\t%d", it.key, it.value) }
74
      } else {
75
      occs.take(globalVmax).each { println sprintf("%s\t%d", it.key, it.value) }
76
      }
77
    } else {
78
      if (globalVmax == 0) {
79
        occs.each { println it }
80
      } else {
81
        occs.take(globalVmax).each { println it }
82
      }
83
    }
84
  }
85

  
TXM/trunk/org.txm.groovy.core/src/groovy/org/txm/macro/cqp/StructsIndexMacro.groovy (revision 3590)
1
// Copyright © 2023 ENS de Lyon
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author sheiden
4

  
5
// STANDARD DECLARATIONS
6
package org.txm.macro.cqp
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.macro.cqp.CQPUtils
12
import org.txm.searchengine.cqp.CQPSearchEngine
13

  
14
scriptName = this. class .getSimpleName()
15

  
16
// BEGINNING OF PARAMETERS
17

  
18
@Field @Option(name = "structs_props", usage = "struct1@prop1_prop2,struct2@prop3_prop4...", widget 
19
    = "String", required = true, def  = "")
20
def structs_props
21

  
22
@Field @Option(name = "Vmax", usage = "maximum number of values displayed (use '0' to display all)", widget = "Integer", required = true, 
23
    def  = "0")
24
def Vmax
25

  
26
// Open the parameters input dialog box
27
if (!ParametersDialog.open(this)) {
28
    return
29
}
30

  
31
// END OF PARAMETERS
32

  
33
utils = new CQPUtils()
34
corpusEngine = CQPSearchEngine.getCqiClient()
35

  
36
// check for a corpus selection
37
corpora = utils.getCorpora(this)
38

  
39
if ((corpora == null) || corpora.size() > 1) {
40
    println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting."
41
    return false
42
}
43

  
44
// check for structs_props parameter
45
if ((structs_props == null) || structs_props.size() == 0) {
46
    println "** $scriptName: please set the 'structs_props' parameter. Aborting."
47
    return false
48
}
49

  
50
corpus_name = corpora[0].getMainCorpus().getName()
51
corpus_size = corpora[0].getMainCorpus().getSize()
52

  
53
if (structs_props.indexOf(',') == -1) {
54
    struct_props = [structs_props]
55
} else {
56
    struct_props = structs_props.tokenize(',')*.trim()
57
}
58

  
59
globalVmax = Vmax
60

  
61
def printPropValues(start, end, struct_prop, output) {
62

  
63
    if (struct_prop.size() > 0) {
64
    
65
        curr_struct_prop = struct_prop[0]
66
        s_p = curr_struct_prop.tokenize('@')*.trim()
67
        def struct = s_p[0]
68
        def prop = s_p[1]
69
        
70
        if ((prop == null) || prop.size() == 0) {
71
            println "** $scriptName: please set @properties in the 'structs_props' parameter."
72
        } else {
73
        
74
            if (prop.indexOf('_') == -1) {
75
                prop = [prop]
76
            } else {
77
                prop = prop.tokenize('_')*.trim()
78
            }
79
            
80
            occs = (0..corpusEngine.attributeSize("$corpus_name.${struct}_${prop[0]}")-1).collect {
81
                idx -> curr_start = corpusEngine.struc2Cpos("$corpus_name.${struct}_${prop[0]}", idx as int)[0]
82
                curr_end = corpusEngine.struc2Cpos("$corpus_name.${struct}_${prop[0]}", idx as int)[1]
83
                if (curr_start >= start && curr_end <= end) {
84
                    [
85
                        curr_start,
86
                        curr_end,
87
                        prop.collect {
88
                            curr_prop -> corpusEngine.struc2Str("$corpus_name.${struct}_$curr_prop", [idx] as int [])[0]
89
                        }.join('_')
90
                    ]
91
                } else null
92
            }
93
            def n = 0
94
            occs.each {
95
                if (it != null && (globalVmax == 0 || n < globalVmax)) {
96
                    (start, end, value) = it
97
                    // println prefix+value // struct+"@"+prop[0]+"="+
98
                    if (output.length() > 0) {
99
                        printPropValues(start, end, struct_prop.tail(), output + "\t" + value)
100
                    } else {
101
                        printPropValues(start, end, struct_prop.tail(), value)
102
                    }
103
                    n++
104
                }
105
            }
106
        }
107
    } else println output
108
}
109

  
110
printPropValues(0, corpus_size, struct_props, "")
111

  
112

  

Formats disponibles : Unified diff