Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / macro / cqp / ExecCQPMacro.groovy @ 187

History | View | Annotate | Download (6.1 kB)

1
package org.txm.macro.cqp
2
// Copyright © 2014 ENS de Lyon
3
//
4
// Authors:
5
// - Matthieu Decorde
6
// - Serge Heiden
7
//
8
// Licence:
9
// This file is part of the TXM platform.
10
// The TXM platform is free software: you can redistribute it
11
// and/or modify it under the terms of the GNU General Public
12
// License as published by the Free Software Foundation,
13
// either version 2 of the License, or (at your option) any
14
// later version.
15
//
16
// The TXM platform is distributed in the hope that it will be
17
// useful, but WITHOUT ANY WARRANTY; without even the implied
18
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
19
// PURPOSE. See the GNU General Public License for more
20
// details.
21
//
22
// You should have received a copy of the GNU General
23
// Public License along with the TXM platform. If not, see
24
// http://www.gnu.org/licenses.
25
//
26
// Version:
27
// $LastChangedDate: 2014-11-01 16:00:01 +0100 (sam., 1 nov. 2014) $
28
// $LastChangedRevision: XXXX $
29
// $LastChangedBy: sheiden $
30
//
31

    
32
//Title:
33
// EN:
34
// Macro to call directly internal CQP corpus engine statements
35
//
36
// Warning: This macro must be used by advanced users,
37
// at the risk of compromising CQP engine stability for the current TXM session
38
//
39
//FR:
40
// Macro de demande d'exécution directe d'instructions au moteur de corpus interne CQP
41
//
42
// Avertissement : Cette macro doit être utilisée par des utilisateurs avertis,
43
// au risque de compromettre la stabilité du moteur CQP pour la session TXM courante
44

    
45
// Necessary declarations - Déclarations préalables
46
import org.kohsuke.args4j.*
47
import groovy.transform.Field
48
import org.txm.rcpapplication.swt.widget.parameters.*
49
import org.txm.Toolbox
50
import org.txm.searchengine.cqp.*
51

    
52
// Sanity check - Vérification de cohérence
53
def CQI = Toolbox.getCqiClient();
54
if ((CQI instanceof NetCqiClient)) {
55
        println "Error: CQP eval only available in CQP memory mode"
56
        return;
57
}
58

    
59
// Parameter declaration - Déclaration du paramètre
60
@Field @Option(name="statement", usage="CQP statement to execute (mind the ';' at the end)", widget="String", required=true, def="set da enpos;")
61
String statement;
62

    
63
// Usefull CQP statements - Instructions CQP Utiles //
64
//
65
// 1] Set a CQP variable - régler une variable CQP :
66
// "set variable valeur;"
67
//
68
// Useful CQP variables
69
// A) da = DefaultNonbrackAttr variable setting: the implicit word property in subsequent CQL queries
70
//    For example, when DefaultNonbrackAttr=word, the "the" CQL query is interpreted by CQP as the '[word="the"]' query
71
// Example Statements:
72
// - set da word;     set implicit word property to "word" ('word' meaning graphical form of the word)
73
// - set da enlemma;        set implicit word property to "enlemma" ('enlemma' meaning TreeTagger based 'En'glish 'Lemma')
74
// - set da enpos;        set implicit word property to "enpos" ('enpos' meaning TreeTagger based 'En'glish 'P'art 'O'f 'S'peech)
75
//
76
// B) ms = MatchingStrategy variable setting: the CQL query word level iterators (?, *, +) resolution strategy used by CQP
77
// Possible Statements:
78
// - set ms shortest: ?, *, + word level operators should match sequences with the minimal number of words.
79
//                    Optional words at the begining or at the end of the query are ignored.
80
// - set ms standard: early match of ?, *, + word level operators are considered.
81
//                    Optional words at the end of the query are ignored.
82
// - set ms longest: ?, *, + word level operators should match sequences with the maximum number of words.
83
// - set ms traditional: early match of ?, *, + word level operators are considered. Every sequence is returned.
84
//
85
// Example resolutions for each MatchingStrategy type 
86
// (from The CQP Query Language Tutorial, (CWB version 2.2.b90), Stefan Evert, 10 July 2005)
87
//
88
// for the query:
89
// [enpos="DET"]? [enpos="ADJ"]* [enpos="NN"] ([enpos="PREP"] [enpos="DET"]? [enpos="ADJ"]* [enpos="NN"])*
90
//
91
// Input:
92
//     the old book on the table in the room
93
//
94
// - 'shortest' match strategy: (3 matches)
95
// r1=         book
96
// r2=                     table
97
// r3=                                  room
98
//
99
// - 'longest' match strategy: (1 match)
100
// r1= the old book on the table in the room
101
//
102
// - 'standard' matching strategy: (3 matches)
103
// r1= the old book
104
// r2=                 the table
105
// r3=                              the room
106
//
107
// - 'traditional' matching strategy: (7 overlapping matches)
108
// r1= the old book
109
// r2=     old book
110
// r3=         book
111
// r4=                 the table
112
// r5=                     table
113
// r6=                              the room
114
// r7=                                  room
115
//
116
// C) sr = StrictRegions variable setting: CQL queries must match a single structure
117
// Possible Statements:
118
// - set sr on: ?, *, + word level operators cannot cross a boundary of the structure involved in the query
119
// - set sr off: ?, *, + word level operators can cross any number of boundaries of the structure involved in the query
120

    
121
// Parameters settings UI
122
if (!ParametersDialog.open(this)) {
123
        println("** ExecCQLMacro error: Impossible to open Parameters settings UI dialog box.")
124
        return
125
}
126

    
127
// Actual CQP call
128
CQI.query(statement)
129

    
130
// End of the macro
131

    
132
/////////////////////////////////////
133
//// CQP variables documentation ////
134
/////////////////////////////////////
135

    
136
//// Useful CQP variables ///
137
// [da]        DefaultNonbrackAttr - implicit word property in queries ('word' by default)
138
// [ms]        MatchingStrategy    - shortest | standard | longest | traditional
139
// [sr]        StrictRegions       - single structure match
140

    
141
//// Maybe useful variables ////
142
// [o]        Optimize - simple regular expressions infixe optimizer
143
// [hf]        HistoryFile
144
// [wh]        WriteHistory
145

    
146
//// Useless variables ////
147
// [p]         Paging
148
// [pg]         Pager
149
// [h]         Highlighting
150
// [col] Colour
151
// [pb]         ProgressBar
152
// [pp]         PrettyPrint
153
// [c]         Context
154
// [lc]         LeftContext
155
// [rc]         RightContext
156
// [ld]         LeftKWICDelim
157
// [rd]         RightKWICDelim
158
// [pm]         PrintMode
159
// [po]         PrintOptions
160
// [ps]         PrintStructures
161
// [sta] ShowTagAttributes
162
// [st]         ShowTargets
163
// [as]         AutoShow
164
// [es]         ExternalSort
165
// [esc] ExternalSortCommand
166
//              AutoSave
167
//              SaveOnExit
168
//              Timing
169

    
170
//// Variables that must not be changed ////
171
// [r]         Registry
172
// [dd]         DataDirectory
173
// [sub] AutoSubquery