Révision 2335

tmp/org.txm.tigersearch.rcp/groovy/org/txm/macro/tiger/exploit/TIGERSVOSummaryMacro.groovy (revision 2335)
1
// Copyright © 2019 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// STANDARD DECLARATIONS
5
package org.txm.macro.tiger.exploit
6

  
7
import groovy.transform.Field
8

  
9
import org.txm.searchengine.core.SearchEnginesManager
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.searchengine.ts.TIGERSearchEngine
12
import org.txm.utils.TableReader
13

  
14
def scriptName = this.class.getSimpleName()
15

  
16
def selection = []
17
for (def s : corpusViewSelections) {
18
	if (s instanceof CQPCorpus) selection << s
19
	else if (s instanceof Partition) selection.addAll(s.getParts())
20
}
21

  
22
if (selection.size() == 0) {
23
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
24
	return false
25
} else {
26
	for (def c : selection) c.compute(false)
27
}
28

  
29
@Field @Option(name="query_table_file", usage="A Full TIGERSearch query", widget="FileOpen", required=true, def="queries.ods")
30
		def query_table_file
31
@Field @Option(name="count_subgraph", usage="A Full TIGERSearch query", widget="Boolean", required=true, def="true")
32
		def count_subgraph
33
@Field @Option(name="sujet_value", usage="", widget="String", required=true, def=".*subj.*")
34
		def sujet_value
35
@Field @Option(name="object_value", usage="", widget="String", required=true, def=".*obj.*")
36
		def object_value
37
@Field @Option(name="proposition_type", usage="", widget="String", required=true, def=".*")
38
		def proposition_type
39
@Field @Option(name="proposition_subordonee", usage="", widget="StringArray", metaVar="Sans	Seulement	Avec", required=true, def="Sans")
40
		def proposition_subordonee
41
@Field @Option(name="coordinations", usage="", widget="StringArray", metaVar="Sans	Seulement	Avec", required=true, def="Sans")
42
		def coordinations
43
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
44
		debug
45

  
46
if (!ParametersDialog.open(this)) return
47
	if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
48

  
49
TIGERSearchEngine tse = SearchEnginesManager.getTIGERSearchEngine()
50

  
51
String based_query = """#pivot:[pos="VERB"] & #clause:[cat="root" & type="VFin"] & #clause >L #pivot & #clause >D #obj:[cat=("obj"|"ccomp"|"obj\\:advneg"|"obj\\:advmod")]  & #clause >D #suj:[cat=("nsubj"|"csubj")]  & #obj    >L #objhead:[]  & #suj    >L #sujhead:[]"""
52
println "based_query=$based_query"
53
def SVO_queries = new LinkedHashMap<String, String>()
54
SVO_queries["SVO"] = "& #sujhead .* #pivot   & #pivot   .* #objhead"
55
SVO_queries["SOV"] = "& #sujhead .* #objhead & #objhead .* #pivot"
56
SVO_queries["OSV"] = "& #objhead .* #sujhead & #sujhead .* #pivot"
57
SVO_queries["OVS"] = "& #objhead .* #pivot   & #pivot   .* #sujhead"
58
SVO_queries["VSO"] = "& #pivot   .* #sujhead & #sujhead .* #objhead"
59
SVO_queries["VOS"] = "& #pivot   .* #objhead & #objhead .* #sujhead"
60

  
61
def test_queries = []
62
TableReader reader = new TableReader(query_table_file)
63
reader.readHeaders()
64
def headers = reader.getHeaders()
65
if (!(headers.contains("mesure") && headers.contains("valeur") && headers.contains("requête"))) { // mesure, valeur, requête
66
	println "Error: 'mesure', 'valeur', 'requête' columns not found"
67
	return
68
}
69
while (reader.readRecord()) {
70
	test_queries << [reader.get("mesure"), reader.get("valeur"), reader.get("requête")]
71
}
72

  
73
println "mesure	valeur	SVO	SOV	OSV	OVS	VSO	VOS"
74

  
75
def results = new LinkedHashMap()
76
for (def corpus : selection) {
77
	
78
	def root = corpus.getRootCorpusBuild();
79
	File buildDirectory = new File(root.getProjectDirectory(), "tiger");
80
	
81
	if (!tse.hasIndexes(corpus)) {
82
		println "Warning: skipping $corpus: no TIGERSearch indexes found."
83
		continue;
84
	}
85
	
86
	def tcorpus = tse.getTSCorpus(corpus);
87
	def based_sentences_min_max = tse.getSentMinMax(corpus);
88
	
89
	for (def test : test_queries) {
90
		def test_query = test[2]
91
		
92
		print test[0]+"\t"+test[1]
93
		
94
		for (def k : SVO_queries.keySet()) {
95
			def svo_query = based_query+"\n"+SVO_queries[k]+"\n"+test_query
96
				
97
			def test_result = tcorpus.manager.processQuery(svo_query, based_sentences_min_max[0], based_sentences_min_max[1], 9999999);
98
			if (count_subgraph) {
99
				print "	"+test_result.submatchSize()
100
			} else {
101
				print "	"+test_result.size()
102
			}
103
		}
104
		println ""
105

  
106
		//			for (def svo_match : svo_result.getMatches()) {
107
		//
108
		//			}
109
	}
110
}
111

  
112
println ""
113

  
114
println "Done."
115

  
116
return results
tmp/org.txm.tigersearch.rcp/groovy/org/txm/scripts/importer/tigersearch/TSImporter.groovy (revision 2335)
126 126
		if (!doToXMLTXMStep()) return;
127 127
		if (!doInjectMetadataStep()) return;
128 128

  
129
		module.orderedFiles = new ArrayList<File>();
130
		for (def f : xmlFiles) module.orderedFiles.add(new File(outputDirectory, f.getName()))
131
		
129
		module.orderedFiles = new ArrayList<String>();
130
		for (def f : xmlFiles) {
131
			String id = f.getName()
132
			id = id.substring(0, id.indexOf(".xml"))
133
			module.orderedFiles.add(id)
134
		}
132 135
		isSuccessFul = outputDirectory.listFiles(IOUtils.HIDDENFILE_FILTER).size() > 0
133 136
		
134 137
		String cleanDirectories = project.getCleanAfterBuild();
tmp/org.txm.tigersearch.rcp/groovy/org/txm/scripts/importer/tigersearch/TSImport.groovy (revision 2335)
99 99
		}
100 100
	}
101 101
	
102
	ArrayList<String> orderedFiles= null;
102
	ArrayList<String> orderedFiles = null;
103 103
	protected ArrayList<String> getTXMFilesOrder() {
104 104
		orderedFiles;
105 105
	}
tmp/org.txm.tigersearch.rcp/src/org/txm/searchengine/ts/TIGERSearchEngine.java (revision 2335)
85 85
	 * @throws CqiClientException
86 86
	 */
87 87
	public static int[] getSentMinMax(CQPCorpus cqpCorpus) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
88
		List<org.txm.searchengine.cqp.corpus.query.Match> matches = cqpCorpus.getMatches();
89
		if (matches.size() == 0) {
90
			return new int[]{0,0};
91
		}
92

  
93
		return getSentMinMax(cqpCorpus, matches.get(0).getStart(), matches.get(matches.size()-1).getEnd());
94
	}
95
	
96
	/**
97
	 * 
98
	 * @param cqpCorpus the targeted CQPCorpus 
99
	 * @return the first sentence and last sentence id (from 0 to N, N the number of sentences). WARNING: this is not the list of sentences in the targeted CQPCorpus. unless the CQPcorpus is contigues
100
	 * 
101
	 * @throws UnexpectedAnswerException
102
	 * @throws IOException
103
	 * @throws CqiServerError
104
	 * @throws CqiClientException
105
	 */
106
	public static int[] getSentMinMax(CQPCorpus cqpCorpus, int start, int end) throws UnexpectedAnswerException, IOException, CqiServerError, CqiClientException {
88 107
		AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
89 108
		List<org.txm.searchengine.cqp.corpus.query.Match> matches = cqpCorpus.getMatches();
90 109
		if (matches.size() == 0) {
91 110
			return new int[]{0,0};
92 111
		}
93
		int[] cpos = new int[] {matches.get(0).getStart(), matches.get(matches.size()-1).getEnd()};
112
		int[] cpos = new int[] {start, end};
94 113
		int[] structs = CQI.cpos2Struc(cqpCorpus.getStructuralUnit("s").getProperty("n").getQualifiedName(), cpos);
95 114
		if (structs.length == 0) {
96 115
			return new int[]{0,0};

Formats disponibles : Unified diff