Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / macro / tiger / exploit / TIGERSVOSummaryMacro.groovy @ 2346

History | View | Annotate | Download (5.7 kB)

1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

    
7
import groovy.transform.Field
8

    
9
import org.txm.libs.office.WriteODS
10
import org.txm.searchengine.core.SearchEnginesManager
11
import org.txm.searchengine.cqp.corpus.*
12
import org.txm.searchengine.ts.TIGERSearchEngine
13
import org.txm.utils.ConsoleProgressBar
14
import org.txm.utils.TableReader
15

    
16
def scriptName = this.class.getSimpleName()
17

    
18
def selection = []
19
for (def s : corpusViewSelections) {
20
        if (s instanceof CQPCorpus) selection << s
21
        else if (s instanceof Partition) selection.addAll(s.getParts())
22
}
23

    
24
if (selection.size() == 0) {
25
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26
        return false
27
} else {
28
        for (def c : selection) c.compute(false)
29
}
30

    
31
@Field @Option(name="new_ods_file", usage="file to create", widget="FileSave", required=true, def="result.ods")
32
                def new_ods_file
33
@Field @Option(name="query_table_file", usage="A Full TIGERSearch query", widget="FileOpen", required=true, def="queries.ods")
34
                def query_table_file
35
@Field @Option(name="count_sub_matches", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
36
                def count_sub_matches
37
@Field @Option(name="sujet_value", usage="", widget="String", required=true, def="[cat=(\"nsubj\"|\"csubj\")]")
38
                def sujet_value
39
@Field @Option(name="object_value", usage="", widget="String", required=true, def="[cat=(\"obj\"|\"ccomp\"|\"obj\\\\:advneg\"|\"obj\\\\:advmod\")]")
40
                def object_value
41
@Field @Option(name="clauses_count_main", usage="", widget="Boolean", required=true, def="true")
42
                def clauses_count_main
43
@Field @Option(name="clauses_count_subordinate", usage="", widget="Boolean", required=true, def="false")
44
                def clauses_count_subordinate
45
@Field @Option(name="clauses_count_inserted", usage="", widget="Boolean", required=true, def="false")
46
                def clauses_count_inserted
47
@Field @Option(name="print_queries", usage="insert columns with the used queries", widget="Boolean", required=true, def="true")
48
                def print_queries
49
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
50
                debug
51

    
52
if (!ParametersDialog.open(this)) return
53

    
54
        if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
55

    
56
if (!clauses_count_main && !clauses_count_subordinate && !clauses_count_inserted) {
57
        println "Error: at least one clause must be selected"
58
        return
59
}
60

    
61
String based_query = """#pivot:[pos="VERB"] 
62
& #clause:CLAUSEVALUE
63
& #clause >L #pivot 
64
& #clause >D #obj:OBJVALUE
65
& #clause >D #suj:SUJVALUE
66
& #obj    >L #objhead:[]  & #suj    >L #sujhead:[]"""
67

    
68
String clause_value = "[cat=\"root\" & type=\"VFin\"]" // main clause
69
if (clauses_count_inserted) {
70
        clause_value = "[cat=\"Insrt\" & type=\"VFin\"]"
71
} else if (clauses_count_subordinate) {
72
        clause_value = "[cat!=(\"root\"|\"Insrt\") & type=\"VFin\"]"
73
}
74
based_query = based_query.replaceAll("CLAUSEVALUE", clause_value)
75
based_query = based_query.replaceAll("OBJVALUE", object_value)
76
based_query = based_query.replaceAll("SUJVALUE", sujet_value)
77

    
78
println "based_query=$based_query"
79
def SVO_queries = new LinkedHashMap<String, String>()
80
SVO_queries["SVO"] = "& #sujhead .* #pivot   & #pivot   .* #objhead"
81
SVO_queries["SOV"] = "& #sujhead .* #objhead & #objhead .* #pivot"
82
SVO_queries["OSV"] = "& #objhead .* #sujhead & #sujhead .* #pivot"
83
SVO_queries["OVS"] = "& #objhead .* #pivot   & #pivot   .* #sujhead"
84
SVO_queries["VSO"] = "& #pivot   .* #sujhead & #sujhead .* #objhead"
85
SVO_queries["VOS"] = "& #pivot   .* #objhead & #objhead .* #sujhead"
86
int ncols = 2 + (SVO_queries.keySet().size() * (print_queries?2:1))
87
        
88
def test_queries = []
89
TableReader reader = new TableReader(query_table_file)
90
reader.readHeaders()
91

    
92
WriteODS writer = new WriteODS(new_ods_file);
93

    
94
def headers = reader.getHeaders()
95
if (!(headers.contains("mesure") && headers.contains("valeur") && headers.contains("requête"))) { // mesure, valeur, requête
96
        println "Error: 'mesure', 'valeur', 'requête' columns not found"
97
        return
98
}
99
while (reader.readRecord()) {
100
        test_queries << [
101
                reader.get("mesure"),
102
                reader.get("valeur"),
103
                reader.get("requête")
104
        ]
105
}
106

    
107
ConsoleProgressBar cpb = new ConsoleProgressBar(selection.size())
108

    
109

    
110
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
111
def results = new LinkedHashMap()
112
for (def corpus : selection) {
113
        cpb.tick()
114
        
115
        writer.newTable(corpus.getName())
116
        
117
        def root = corpus.getRootCorpusBuild();
118
        File buildDirectory = new File(root.getProjectDirectory(), "tiger");
119
        
120
        if (!tse.hasIndexes(corpus)) {
121
                println "Warning: skipping $corpus: no TIGERSearch indexes found."
122
                continue;
123
        }
124
        
125
        writer.declareRowsAndColumns(0, ncols)
126
        def cols = ["mesure", "valeur"] 
127
        for (String col : SVO_queries.keySet()) {
128
                if (print_queries) cols << "q"+col
129
                cols << col
130
        }
131
        writer.writeLine(cols)
132
                
133
        def tcorpus = tse.getTSCorpus(corpus);
134
        def based_sentences_min_max = tse.getSentMinMax(corpus);
135
        
136
        for (def test : test_queries) {
137
                def test_query = test[2]
138
                def lineToWrite = []
139
                lineToWrite << test[0]
140
                lineToWrite << test[1]
141
                
142
                for (def k : SVO_queries.keySet()) {
143
                        def svo_query = based_query+"\n"+SVO_queries[k]+"\n"+test_query
144
                        
145
                        if (debug > 0) println svo_query
146
                        
147
                        def test_result = tcorpus.manager.processQuery(svo_query, based_sentences_min_max[0], based_sentences_min_max[1], 9999999);
148
                        if (print_queries) lineToWrite << svo_query
149
                        if (count_sub_matches) {
150
                                lineToWrite << test_result.submatchSize()
151
                        } else {
152
                                lineToWrite << test_result.size()
153
                        }
154
                }
155
                
156
                writer.writeLine(lineToWrite)
157
        }
158
}
159

    
160
writer.save()
161
cpb.done()
162

    
163
return results