|
1 |
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
|
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
|
3 |
// @author mdecorde
|
|
4 |
// STANDARD DECLARATIONS
|
|
5 |
package org.txm.macro.tiger.exploit
|
|
6 |
|
|
7 |
import groovy.transform.Field
|
|
8 |
|
|
9 |
import org.txm.searchengine.core.SearchEnginesManager
|
|
10 |
import org.txm.searchengine.cqp.corpus.*
|
|
11 |
import org.txm.searchengine.ts.TIGERSearchEngine
|
|
12 |
import org.txm.utils.TableReader
|
|
13 |
|
|
14 |
def scriptName = this.class.getSimpleName()
|
|
15 |
|
|
16 |
def selection = []
|
|
17 |
for (def s : corpusViewSelections) {
|
|
18 |
if (s instanceof CQPCorpus) selection << s
|
|
19 |
else if (s instanceof Partition) selection.addAll(s.getParts())
|
|
20 |
}
|
|
21 |
|
|
22 |
if (selection.size() == 0) {
|
|
23 |
println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
|
|
24 |
return false
|
|
25 |
} else {
|
|
26 |
for (def c : selection) c.compute(false)
|
|
27 |
}
|
|
28 |
|
|
29 |
@Field @Option(name="query_table_file", usage="A Full TIGERSearch query", widget="FileOpen", required=true, def="queries.ods")
|
|
30 |
def query_table_file
|
|
31 |
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
|
|
32 |
def count_subgraph
|
|
33 |
@Field @Option(name="sujet_value", usage="", widget="String", required=true, def=".*subj.*")
|
|
34 |
def sujet_value
|
|
35 |
@Field @Option(name="object_value", usage="", widget="String", required=true, def=".*obj.*")
|
|
36 |
def object_value
|
|
37 |
@Field @Option(name="proposition_type", usage="", widget="String", required=true, def=".*")
|
|
38 |
def proposition_type
|
|
39 |
@Field @Option(name="proposition_subordonee", usage="", widget="StringArray", metaVar="Sans Seulement Avec", required=true, def="Sans")
|
|
40 |
def proposition_subordonee
|
|
41 |
@Field @Option(name="coordinations", usage="", widget="StringArray", metaVar="Sans Seulement Avec", required=true, def="Sans")
|
|
42 |
def coordinations
|
|
43 |
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF ON ALL REALLY ALL", required=true, def="OFF")
|
|
44 |
debug
|
|
45 |
|
|
46 |
if (!ParametersDialog.open(this)) return
|
|
47 |
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
|
|
48 |
|
|
49 |
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
|
|
50 |
|
|
51 |
String based_query = """#pivot:[pos="VERB"] & #clause:[cat="root" & type="VFin"] & #clause >L #pivot & #clause >D #obj:[cat=("obj"|"ccomp"|"obj\\:advneg"|"obj\\:advmod")] & #clause >D #suj:[cat=("nsubj"|"csubj")] & #obj >L #objhead:[] & #suj >L #sujhead:[]"""
|
|
52 |
println "based_query=$based_query"
|
|
53 |
def SVO_queries = new LinkedHashMap<String, String>()
|
|
54 |
SVO_queries["SVO"] = "& #sujhead .* #pivot & #pivot .* #objhead"
|
|
55 |
SVO_queries["SOV"] = "& #sujhead .* #objhead & #objhead .* #pivot"
|
|
56 |
SVO_queries["OSV"] = "& #objhead .* #sujhead & #sujhead .* #pivot"
|
|
57 |
SVO_queries["OVS"] = "& #objhead .* #pivot & #pivot .* #sujhead"
|
|
58 |
SVO_queries["VSO"] = "& #pivot .* #sujhead & #sujhead .* #objhead"
|
|
59 |
SVO_queries["VOS"] = "& #pivot .* #objhead & #objhead .* #sujhead"
|
|
60 |
|
|
61 |
def test_queries = []
|
|
62 |
TableReader reader = new TableReader(query_table_file)
|
|
63 |
reader.readHeaders()
|
|
64 |
def headers = reader.getHeaders()
|
|
65 |
if (!(headers.contains("mesure") && headers.contains("valeur") && headers.contains("requête"))) { // mesure, valeur, requête
|
|
66 |
println "Error: 'mesure', 'valeur', 'requête' columns not found"
|
|
67 |
return
|
|
68 |
}
|
|
69 |
while (reader.readRecord()) {
|
|
70 |
test_queries << [reader.get("mesure"), reader.get("valeur"), reader.get("requête")]
|
|
71 |
}
|
|
72 |
|
|
73 |
println "mesure valeur SVO SOV OSV OVS VSO VOS"
|
|
74 |
|
|
75 |
def results = new LinkedHashMap()
|
|
76 |
for (def corpus : selection) {
|
|
77 |
|
|
78 |
def root = corpus.getRootCorpusBuild();
|
|
79 |
File buildDirectory = new File(root.getProjectDirectory(), "tiger");
|
|
80 |
|
|
81 |
if (!tse.hasIndexes(corpus)) {
|
|
82 |
println "Warning: skipping $corpus: no TIGERSearch indexes found."
|
|
83 |
continue;
|
|
84 |
}
|
|
85 |
|
|
86 |
def tcorpus = tse.getTSCorpus(corpus);
|
|
87 |
def based_sentences_min_max = tse.getSentMinMax(corpus);
|
|
88 |
|
|
89 |
for (def test : test_queries) {
|
|
90 |
def test_query = test[2]
|
|
91 |
|
|
92 |
print test[0]+"\t"+test[1]
|
|
93 |
|
|
94 |
for (def k : SVO_queries.keySet()) {
|
|
95 |
def svo_query = based_query+"\n"+SVO_queries[k]+"\n"+test_query
|
|
96 |
|
|
97 |
def test_result = tcorpus.manager.processQuery(svo_query, based_sentences_min_max[0], based_sentences_min_max[1], 9999999);
|
|
98 |
if (count_subgraph) {
|
|
99 |
print " "+test_result.submatchSize()
|
|
100 |
} else {
|
|
101 |
print " "+test_result.size()
|
|
102 |
}
|
|
103 |
}
|
|
104 |
println ""
|
|
105 |
|
|
106 |
// for (def svo_match : svo_result.getMatches()) {
|
|
107 |
//
|
|
108 |
// }
|
|
109 |
}
|
|
110 |
}
|
|
111 |
|
|
112 |
println ""
|
|
113 |
|
|
114 |
println "Done."
|
|
115 |
|
|
116 |
return results
|