Statistics
| Revision:

root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / macro / tiger / exploit / TIGERSVOSummaryMacro.groovy @ 2930

History | View | Annotate | Download (6.4 kB)

1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

    
7
import groovy.transform.Field
8

    
9
import org.txm.libs.office.WriteODS
10
import org.txm.searchengine.core.SearchEnginesManager
11
import org.txm.searchengine.cqp.corpus.*
12
import org.txm.searchengine.ts.TIGERSearchEngine
13
import org.txm.utils.ConsoleProgressBar
14
import org.txm.utils.TableReader
15

    
16
def scriptName = this.class.getSimpleName()
17

    
18
def selection = []
19
for (def s : corpusViewSelections) {
20
        if (s instanceof CQPCorpus) selection << s
21
        else if (s instanceof Partition) selection.addAll(s.getParts())
22
}
23

    
24
if (selection.size() == 0) {
25
        println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
26
        return false
27
} else {
28
        for (def c : selection) c.compute(false)
29
}
30

    
31
@Field @Option(name="new_ods_file", usage="file to create", widget="FileSave", required=true, def="result.ods")
32
                def new_ods_file
33
@Field @Option(name="query_table_file", usage="A Full TIGERSearch query", widget="FileOpen", required=true, def="queries.ods")
34
                def query_table_file
35
@Field @Option(name="count_sub_matches", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
36
                def count_sub_matches
37
@Field @Option(name="sujet_value", usage="", widget="String", required=true, def="[cat=(\"nsubj\"|\"csubj\")]")
38
                def sujet_value
39
@Field @Option(name="object_value", usage="", widget="String", required=true, def="[cat=(\"obj\"|\"ccomp\"|\"obj\\\\:advneg\"|\"obj\\\\:advmod\")]")
40
                def object_value
41
@Field @Option(name="clauses_count_main", usage="", widget="Boolean", required=true, def="true")
42
                def clauses_count_main
43
@Field @Option(name="clauses_count_subordinate", usage="", widget="Boolean", required=true, def="false")
44
                def clauses_count_subordinate
45
@Field @Option(name="clauses_count_inserted", usage="", widget="Boolean", required=true, def="false")
46
                def clauses_count_inserted
47
@Field @Option(name="print_queries", usage="insert columns with the used queries", widget="Boolean", required=true, def="true")
48
                def print_queries
49
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF        ON        ALL        REALLY ALL", required=true, def="OFF")
50
                debug
51

    
52
if (!ParametersDialog.open(this)) return
53

    
54
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
55

    
56
if (query_table_file == null || !query_table_file.exists()) {
57
        println "Error: query_table_file not set or wrong path: "+query_table_file
58
        return
59
}
60

    
61
if (!clauses_count_main && !clauses_count_subordinate && !clauses_count_inserted) {
62
        println "Error: at least one clause must be selected"
63
        return
64
}
65

    
66
String based_query = """#pivot:[pos="VERB"] 
67
& #clause:CLAUSEVALUE
68
& #clause >L #pivot 
69
& #clause >D #obj:OBJVALUE
70
& #clause >D #suj:SUJVALUE
71
& #obj    >L #objhead:[]  & #suj    >L #sujhead:[]"""
72

    
73
String clause_value = "[cat=\"root\" & type=\"VFin\"]" // main clause
74
if (clauses_count_inserted) {
75
        clause_value = "[cat=\"Insrt\" & type=\"VFin\"]"
76
} else if (clauses_count_subordinate) {
77
        clause_value = "[cat!=(\"root\"|\"Insrt\") & type=\"VFin\"]"
78
}
79
based_query = based_query.replaceAll("CLAUSEVALUE", clause_value)
80
based_query = based_query.replaceAll("OBJVALUE", object_value)
81
based_query = based_query.replaceAll("SUJVALUE", sujet_value)
82

    
83
println "based_query=$based_query"
84
def SVO_queries = new LinkedHashMap<String, String>()
85
SVO_queries["SVO"] = "& #sujhead .* #pivot   & #pivot   .* #objhead"
86
SVO_queries["SOV"] = "& #sujhead .* #objhead & #objhead .* #pivot"
87
SVO_queries["OSV"] = "& #objhead .* #sujhead & #sujhead .* #pivot"
88
SVO_queries["OVS"] = "& #objhead .* #pivot   & #pivot   .* #sujhead"
89
SVO_queries["VSO"] = "& #pivot   .* #sujhead & #sujhead .* #objhead"
90
SVO_queries["VOS"] = "& #pivot   .* #objhead & #objhead .* #sujhead"
91
int ncols = 2 + (SVO_queries.keySet().size() * (print_queries?2:1))
92
        
93
def test_queries = []
94
TableReader reader = new TableReader(query_table_file)
95
reader.readHeaders()
96

    
97
WriteODS writer = new WriteODS(new_ods_file);
98

    
99
def headers = reader.getHeaders()
100
if (!(headers.contains("mesure") && headers.contains("valeur") && headers.contains("requête"))) { // mesure, valeur, requête
101
        println "Error: 'mesure', 'valeur', 'requête' columns not found"
102
        return
103
}
104
while (reader.readRecord()) {
105
        if (reader.get("mesure").length() > 0 && reader.get("valeur").length() > 0 && reader.get("requête").length() > 0) {
106
        test_queries << [
107
                reader.get("mesure"),
108
                reader.get("valeur"),
109
                reader.get("requête")
110
        ]
111
        }
112
}
113

    
114
if (test_queries.size() == 0) {
115
        println "Error: no queries found in table file: "+query_table_file
116
        return null;
117
}
118

    
119
ConsoleProgressBar cpb = new ConsoleProgressBar(selection.size()*SVO_queries.size()*test_queries.size())
120
println "Resolving ${test_queries.size()*SVO_queries.size()} queries for ${selection.size()} corpus."
121
println "selection=$selection"
122
println "test_queries=$test_queries"
123
println "SVO_queries=$SVO_queries"
124

    
125
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
126
def results = new LinkedHashMap()
127
for (def corpus : selection) {
128
        
129
        writer.newTable(corpus.getName())
130
        
131
        def root = corpus.getRootCorpusBuild();
132
        File buildDirectory = new File(root.getProjectDirectory(), "tiger");
133
        
134
        if (!tse.hasIndexes(corpus)) {
135
                println "Warning: skipping $corpus: no TIGERSearch indexes found."
136
                continue;
137
        }
138
        
139
        writer.declareRowsAndColumns(0, ncols)
140
        def cols = ["mesure", "valeur"] 
141
        for (String col : SVO_queries.keySet()) {
142
                if (print_queries) cols << "q"+col
143
                cols << col
144
        }
145
        writer.writeLine(cols)
146
                
147
        def tcorpus = tse.getTSCorpus(corpus);
148
        def based_sentences_min_max = tse.getSentMinMax(corpus);
149
        
150
        for (def test : test_queries) {
151
                def test_query = test[2]
152
                def lineToWrite = []
153
                lineToWrite << test[0]
154
                lineToWrite << test[1]
155
                
156
                for (def k : SVO_queries.keySet()) {
157
                        def svo_query = based_query+"\n"+SVO_queries[k]+"\n"+test_query
158
                        
159
                        if (debug > 0) println svo_query
160
                        
161
                        cpb.tick()
162
                        
163
                        def test_result = tcorpus.manager.processQuery(svo_query, based_sentences_min_max[0], based_sentences_min_max[1], 9999999);
164
                        if (print_queries) lineToWrite << svo_query
165
                        if (count_sub_matches) {
166
                                lineToWrite << test_result.submatchSize()
167
                        } else {
168
                                lineToWrite << test_result.size()
169
                        }
170
                }
171
                
172
                writer.writeLine(lineToWrite)
173
        }
174
}
175

    
176
writer.save()
177
cpb.done()
178

    
179
return results