root / tmp / org.txm.tigersearch.rcp / groovy / org / txm / macro / tiger / exploit / TIGERSVOSummaryMacro.groovy @ 2346
History | View | Annotate | Download (5.7 kB)
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
|
---|---|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
3 |
// @author mdecorde
|
4 |
// STANDARD DECLARATIONS
|
5 |
package org.txm.macro.tiger.exploit
|
6 |
|
7 |
import groovy.transform.Field |
8 |
|
9 |
import org.txm.libs.office.WriteODS |
10 |
import org.txm.searchengine.core.SearchEnginesManager |
11 |
import org.txm.searchengine.cqp.corpus.* |
12 |
import org.txm.searchengine.ts.TIGERSearchEngine |
13 |
import org.txm.utils.ConsoleProgressBar |
14 |
import org.txm.utils.TableReader |
15 |
|
16 |
def scriptName = this.class.getSimpleName() |
17 |
|
18 |
def selection = [] |
19 |
for (def s : corpusViewSelections) { |
20 |
if (s instanceof CQPCorpus) selection << s |
21 |
else if (s instanceof Partition) selection.addAll(s.getParts()) |
22 |
} |
23 |
|
24 |
if (selection.size() == 0) { |
25 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
26 |
return false |
27 |
} else {
|
28 |
for (def c : selection) c.compute(false) |
29 |
} |
30 |
|
31 |
@Field @Option(name="new_ods_file", usage="file to create", widget="FileSave", required=true, def="result.ods") |
32 |
def new_ods_file
|
33 |
@Field @Option(name="query_table_file", usage="A Full TIGERSearch query", widget="FileOpen", required=true, def="queries.ods") |
34 |
def query_table_file
|
35 |
@Field @Option(name="count_sub_matches", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true") |
36 |
def count_sub_matches
|
37 |
@Field @Option(name="sujet_value", usage="", widget="String", required=true, def="[cat=(\"nsubj\"|\"csubj\")]") |
38 |
def sujet_value
|
39 |
@Field @Option(name="object_value", usage="", widget="String", required=true, def="[cat=(\"obj\"|\"ccomp\"|\"obj\\\\:advneg\"|\"obj\\\\:advmod\")]") |
40 |
def object_value
|
41 |
@Field @Option(name="clauses_count_main", usage="", widget="Boolean", required=true, def="true") |
42 |
def clauses_count_main
|
43 |
@Field @Option(name="clauses_count_subordinate", usage="", widget="Boolean", required=true, def="false") |
44 |
def clauses_count_subordinate
|
45 |
@Field @Option(name="clauses_count_inserted", usage="", widget="Boolean", required=true, def="false") |
46 |
def clauses_count_inserted
|
47 |
@Field @Option(name="print_queries", usage="insert columns with the used queries", widget="Boolean", required=true, def="true") |
48 |
def print_queries
|
49 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF") |
50 |
debug |
51 |
|
52 |
if (!ParametersDialog.open(this)) return |
53 |
|
54 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3 |
55 |
|
56 |
if (!clauses_count_main && !clauses_count_subordinate && !clauses_count_inserted) {
|
57 |
println "Error: at least one clause must be selected"
|
58 |
return
|
59 |
} |
60 |
|
61 |
String based_query = """#pivot:[pos="VERB"] |
62 |
& #clause:CLAUSEVALUE
|
63 |
& #clause >L #pivot
|
64 |
& #clause >D #obj:OBJVALUE
|
65 |
& #clause >D #suj:SUJVALUE
|
66 |
& #obj >L #objhead:[] & #suj >L #sujhead:[]"""
|
67 |
|
68 |
String clause_value = "[cat=\"root\" & type=\"VFin\"]" // main clause |
69 |
if (clauses_count_inserted) {
|
70 |
clause_value = "[cat=\"Insrt\" & type=\"VFin\"]"
|
71 |
} else if (clauses_count_subordinate) { |
72 |
clause_value = "[cat!=(\"root\"|\"Insrt\") & type=\"VFin\"]"
|
73 |
} |
74 |
based_query = based_query.replaceAll("CLAUSEVALUE", clause_value)
|
75 |
based_query = based_query.replaceAll("OBJVALUE", object_value)
|
76 |
based_query = based_query.replaceAll("SUJVALUE", sujet_value)
|
77 |
|
78 |
println "based_query=$based_query"
|
79 |
def SVO_queries = new LinkedHashMap<String, String>() |
80 |
SVO_queries["SVO"] = "& #sujhead .* #pivot & #pivot .* #objhead" |
81 |
SVO_queries["SOV"] = "& #sujhead .* #objhead & #objhead .* #pivot" |
82 |
SVO_queries["OSV"] = "& #objhead .* #sujhead & #sujhead .* #pivot" |
83 |
SVO_queries["OVS"] = "& #objhead .* #pivot & #pivot .* #sujhead" |
84 |
SVO_queries["VSO"] = "& #pivot .* #sujhead & #sujhead .* #objhead" |
85 |
SVO_queries["VOS"] = "& #pivot .* #objhead & #objhead .* #sujhead" |
86 |
int ncols = 2 + (SVO_queries.keySet().size() * (print_queries?2:1)) |
87 |
|
88 |
def test_queries = [] |
89 |
TableReader reader = new TableReader(query_table_file)
|
90 |
reader.readHeaders() |
91 |
|
92 |
WriteODS writer = new WriteODS(new_ods_file);
|
93 |
|
94 |
def headers = reader.getHeaders()
|
95 |
if (!(headers.contains("mesure") && headers.contains("valeur") && headers.contains("requête"))) { // mesure, valeur, requête |
96 |
println "Error: 'mesure', 'valeur', 'requête' columns not found"
|
97 |
return
|
98 |
} |
99 |
while (reader.readRecord()) {
|
100 |
test_queries << [ |
101 |
reader.get("mesure"),
|
102 |
reader.get("valeur"),
|
103 |
reader.get("requête")
|
104 |
] |
105 |
} |
106 |
|
107 |
ConsoleProgressBar cpb = new ConsoleProgressBar(selection.size())
|
108 |
|
109 |
|
110 |
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine() |
111 |
def results = new LinkedHashMap() |
112 |
for (def corpus : selection) { |
113 |
cpb.tick() |
114 |
|
115 |
writer.newTable(corpus.getName()) |
116 |
|
117 |
def root = corpus.getRootCorpusBuild();
|
118 |
File buildDirectory = new File(root.getProjectDirectory(), "tiger"); |
119 |
|
120 |
if (!tse.hasIndexes(corpus)) {
|
121 |
println "Warning: skipping $corpus: no TIGERSearch indexes found."
|
122 |
continue;
|
123 |
} |
124 |
|
125 |
writer.declareRowsAndColumns(0, ncols)
|
126 |
def cols = ["mesure", "valeur"] |
127 |
for (String col : SVO_queries.keySet()) { |
128 |
if (print_queries) cols << "q"+col |
129 |
cols << col |
130 |
} |
131 |
writer.writeLine(cols) |
132 |
|
133 |
def tcorpus = tse.getTSCorpus(corpus);
|
134 |
def based_sentences_min_max = tse.getSentMinMax(corpus);
|
135 |
|
136 |
for (def test : test_queries) { |
137 |
def test_query = test[2] |
138 |
def lineToWrite = [] |
139 |
lineToWrite << test[0]
|
140 |
lineToWrite << test[1]
|
141 |
|
142 |
for (def k : SVO_queries.keySet()) { |
143 |
def svo_query = based_query+"\n"+SVO_queries[k]+"\n"+test_query |
144 |
|
145 |
if (debug > 0) println svo_query |
146 |
|
147 |
def test_result = tcorpus.manager.processQuery(svo_query, based_sentences_min_max[0], based_sentences_min_max[1], 9999999); |
148 |
if (print_queries) lineToWrite << svo_query
|
149 |
if (count_sub_matches) {
|
150 |
lineToWrite << test_result.submatchSize() |
151 |
} else {
|
152 |
lineToWrite << test_result.size() |
153 |
} |
154 |
} |
155 |
|
156 |
writer.writeLine(lineToWrite) |
157 |
} |
158 |
} |
159 |
|
160 |
writer.save() |
161 |
cpb.done() |
162 |
|
163 |
return results
|